{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib.font_manager import FontProperties\n",
    "import matplotlib.lines as mlines\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import operator\n",
    "import jieba\n",
    "import os\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cut_words(file_path):\n",
    "    \"\"\"\n",
    "    对文本进行切词\n",
    "    :param file_path: txt文本路径\n",
    "    :return: 用空格分词的字符串\n",
    "    \"\"\"\n",
    "    text_with_spaces = ''\n",
    "    text=open(file_path, 'r', encoding='gb18030').read()\n",
    "    textcut = jieba.cut(text)\n",
    "    words=[]\n",
    "    for word in textcut:\n",
    "        text_with_spaces += word + ' '\n",
    "        words.append(word)\n",
    "    return text_with_spaces"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadfile(file_dir, label):\n",
    "    \"\"\"\n",
    "    将路径下的所有文件加载\n",
    "    :param file_dir: 保存txt文件目录\n",
    "    :param label: 文档标签\n",
    "    :return: 分词后的文档列表和标签\n",
    "    \"\"\"\n",
    "    file_list = os.listdir(file_dir)\n",
    "    words_list = []\n",
    "    labels_list = []\n",
    "    for file in file_list:\n",
    "        file_path = file_dir + '/' + file\n",
    "        words_list.append(cut_words(file_path))\n",
    "        labels_list.append(label)                                                                                                                 \n",
    "    return words_list, labels_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def stopwordslist(filepath2):    # 定义函数创建停用词列表\n",
    "    stopword = [line.strip() for line in open(filepath2, encoding='utf8', errors='ignore').readlines()]    #以行的形式读取停用词表，同时转换为列表\n",
    "    return stopword"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "stopwords = stopwordslist('text classification/stop/stopword.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['\\ufeff,',\n",
       " '?',\n",
       " '、',\n",
       " '。',\n",
       " '“',\n",
       " '”',\n",
       " '《',\n",
       " '》',\n",
       " '！',\n",
       " '，',\n",
       " '：',\n",
       " '；',\n",
       " '？',\n",
       " '人民',\n",
       " '#',\n",
       " '###',\n",
       " '啊',\n",
       " '阿',\n",
       " '哎',\n",
       " '哎呀',\n",
       " '哎哟',\n",
       " '唉',\n",
       " '俺',\n",
       " '俺们',\n",
       " '按',\n",
       " '按照',\n",
       " '吧',\n",
       " '吧哒',\n",
       " '把',\n",
       " '罢了',\n",
       " '被',\n",
       " '本',\n",
       " '本着',\n",
       " '比',\n",
       " '比方',\n",
       " '比如',\n",
       " '鄙人',\n",
       " '彼',\n",
       " '彼此',\n",
       " '边',\n",
       " '别',\n",
       " '别的',\n",
       " '别说',\n",
       " '并',\n",
       " '并且',\n",
       " '不比',\n",
       " '不成',\n",
       " '不单',\n",
       " '不但',\n",
       " '不独',\n",
       " '不管',\n",
       " '不光',\n",
       " '不过',\n",
       " '不仅',\n",
       " '不拘',\n",
       " '不论',\n",
       " '不怕',\n",
       " '不然',\n",
       " '不如',\n",
       " '不特',\n",
       " '不惟',\n",
       " '不问',\n",
       " '不只',\n",
       " '朝',\n",
       " '朝着',\n",
       " '趁',\n",
       " '趁着',\n",
       " '乘',\n",
       " '冲',\n",
       " '除',\n",
       " '除此之外',\n",
       " '除非',\n",
       " '除了',\n",
       " '此',\n",
       " '此间',\n",
       " '此外',\n",
       " '从',\n",
       " '从而',\n",
       " '打',\n",
       " '待',\n",
       " '但',\n",
       " '但是',\n",
       " '当',\n",
       " '当着',\n",
       " '到',\n",
       " '得',\n",
       " '的',\n",
       " '的话',\n",
       " '等',\n",
       " '等等',\n",
       " '地',\n",
       " '第',\n",
       " '叮咚',\n",
       " '对',\n",
       " '对于',\n",
       " '多',\n",
       " '多少',\n",
       " '而',\n",
       " '而况',\n",
       " '而且',\n",
       " '而是',\n",
       " '而外',\n",
       " '而言',\n",
       " '而已',\n",
       " '尔后',\n",
       " '反过来',\n",
       " '反过来说',\n",
       " '反之',\n",
       " '非但',\n",
       " '非徒',\n",
       " '否则',\n",
       " '嘎',\n",
       " '嘎登',\n",
       " '该',\n",
       " '赶',\n",
       " '个',\n",
       " '各',\n",
       " '各个',\n",
       " '各位',\n",
       " '各种',\n",
       " '各自',\n",
       " '给',\n",
       " '根据',\n",
       " '跟',\n",
       " '故',\n",
       " '故此',\n",
       " '固然',\n",
       " '关于',\n",
       " '管',\n",
       " '归',\n",
       " '果然',\n",
       " '果真',\n",
       " '过',\n",
       " '哈',\n",
       " '哈哈',\n",
       " '呵',\n",
       " '和',\n",
       " '何',\n",
       " '何处',\n",
       " '何况',\n",
       " '何时',\n",
       " '嘿',\n",
       " '哼',\n",
       " '哼唷',\n",
       " '呼哧',\n",
       " '乎',\n",
       " '哗',\n",
       " '还是',\n",
       " '还有',\n",
       " '换句话说',\n",
       " '换言之',\n",
       " '或',\n",
       " '或是',\n",
       " '或者',\n",
       " '极了',\n",
       " '及',\n",
       " '及其',\n",
       " '及至',\n",
       " '即',\n",
       " '即便',\n",
       " '即或',\n",
       " '即令',\n",
       " '即若',\n",
       " '即使',\n",
       " '几',\n",
       " '几时',\n",
       " '己',\n",
       " '既',\n",
       " '既然',\n",
       " '既是',\n",
       " '继而',\n",
       " '加之',\n",
       " '假如',\n",
       " '假若',\n",
       " '假使',\n",
       " '鉴于',\n",
       " '将',\n",
       " '较',\n",
       " '较之',\n",
       " '叫',\n",
       " '接着',\n",
       " '结果',\n",
       " '借',\n",
       " '紧接着',\n",
       " '进而',\n",
       " '尽',\n",
       " '尽管',\n",
       " '经',\n",
       " '经过',\n",
       " '就',\n",
       " '就是',\n",
       " '就是说',\n",
       " '据',\n",
       " '具体',\n",
       " '开始',\n",
       " '开外',\n",
       " '靠',\n",
       " '咳',\n",
       " '可',\n",
       " '可见',\n",
       " '可是',\n",
       " '可以',\n",
       " '况且',\n",
       " '啦',\n",
       " '来',\n",
       " '来着',\n",
       " '离',\n",
       " '例如',\n",
       " '哩',\n",
       " '连',\n",
       " '连同',\n",
       " '两者',\n",
       " '了',\n",
       " '临',\n",
       " '另',\n",
       " '另外',\n",
       " '另一方面',\n",
       " '论',\n",
       " '嘛',\n",
       " '吗',\n",
       " '慢说',\n",
       " '漫说',\n",
       " '冒',\n",
       " '么',\n",
       " '每',\n",
       " '每当',\n",
       " '们',\n",
       " '莫若',\n",
       " '某',\n",
       " '某个',\n",
       " '某些',\n",
       " '拿',\n",
       " '哪',\n",
       " '哪边',\n",
       " '哪儿',\n",
       " '哪个',\n",
       " '哪里',\n",
       " '哪年',\n",
       " '哪怕',\n",
       " '哪天',\n",
       " '哪些',\n",
       " '哪样',\n",
       " '那',\n",
       " '那边',\n",
       " '那儿',\n",
       " '那个',\n",
       " '那会儿',\n",
       " '那里',\n",
       " '那么',\n",
       " '那时',\n",
       " '那些',\n",
       " '那样',\n",
       " '乃',\n",
       " '乃至',\n",
       " '呢',\n",
       " '能',\n",
       " '你',\n",
       " '你们',\n",
       " '您',\n",
       " '宁',\n",
       " '宁可',\n",
       " '宁肯',\n",
       " '宁愿',\n",
       " '哦',\n",
       " '呕',\n",
       " '啪达',\n",
       " '旁人',\n",
       " '呸',\n",
       " '凭',\n",
       " '凭借',\n",
       " '其',\n",
       " '其次',\n",
       " '其二',\n",
       " '其他',\n",
       " '其它',\n",
       " '其一',\n",
       " '其余',\n",
       " '其中',\n",
       " '起',\n",
       " '起见',\n",
       " '岂但',\n",
       " '恰恰相反',\n",
       " '前后',\n",
       " '前者',\n",
       " '且',\n",
       " '然而',\n",
       " '然后',\n",
       " '然则',\n",
       " '让',\n",
       " '人家',\n",
       " '任',\n",
       " '任何',\n",
       " '任凭',\n",
       " '如',\n",
       " '如此',\n",
       " '如果',\n",
       " '如何',\n",
       " '如其',\n",
       " '如若',\n",
       " '如上所述',\n",
       " '若',\n",
       " '若非',\n",
       " '若是',\n",
       " '啥',\n",
       " '上下',\n",
       " '尚且',\n",
       " '设若',\n",
       " '设使',\n",
       " '甚而',\n",
       " '甚么',\n",
       " '甚至',\n",
       " '省得',\n",
       " '时候',\n",
       " '什么',\n",
       " '什么样',\n",
       " '使得',\n",
       " '是',\n",
       " '是的',\n",
       " '首先',\n",
       " '谁',\n",
       " '谁知',\n",
       " '顺',\n",
       " '顺着',\n",
       " '似的',\n",
       " '虽',\n",
       " '虽然',\n",
       " '虽说',\n",
       " '虽则',\n",
       " '随',\n",
       " '随着',\n",
       " '所',\n",
       " '所以',\n",
       " '他',\n",
       " '他们',\n",
       " '他人',\n",
       " '它',\n",
       " '它们',\n",
       " '她',\n",
       " '她们',\n",
       " '倘',\n",
       " '倘或',\n",
       " '倘然',\n",
       " '倘若',\n",
       " '倘使',\n",
       " '腾',\n",
       " '替',\n",
       " '通过',\n",
       " '同',\n",
       " '同时',\n",
       " '哇',\n",
       " '万一',\n",
       " '往',\n",
       " '望',\n",
       " '为',\n",
       " '为何',\n",
       " '为了',\n",
       " '为什么',\n",
       " '为着',\n",
       " '喂',\n",
       " '嗡嗡',\n",
       " '我',\n",
       " '我们',\n",
       " '呜',\n",
       " '呜呼',\n",
       " '乌乎',\n",
       " '无论',\n",
       " '无宁',\n",
       " '毋宁',\n",
       " '嘻',\n",
       " '吓',\n",
       " '相对而言',\n",
       " '像',\n",
       " '向',\n",
       " '向着',\n",
       " '嘘',\n",
       " '呀',\n",
       " '焉',\n",
       " '沿',\n",
       " '沿着',\n",
       " '要',\n",
       " '要不',\n",
       " '要不然',\n",
       " '要不是',\n",
       " '要么',\n",
       " '要是',\n",
       " '也',\n",
       " '也罢',\n",
       " '也好',\n",
       " '一',\n",
       " '一般',\n",
       " '一旦',\n",
       " '一方面',\n",
       " '一来',\n",
       " '一切',\n",
       " '一样',\n",
       " '一则',\n",
       " '依',\n",
       " '依照',\n",
       " '矣',\n",
       " '以',\n",
       " '以便',\n",
       " '以及',\n",
       " '以免',\n",
       " '以至',\n",
       " '以至于',\n",
       " '以致',\n",
       " '抑或',\n",
       " '因',\n",
       " '因此',\n",
       " '因而',\n",
       " '因为',\n",
       " '哟',\n",
       " '用',\n",
       " '由',\n",
       " '由此可见',\n",
       " '由于',\n",
       " '有',\n",
       " '有的',\n",
       " '有关',\n",
       " '有些',\n",
       " '又',\n",
       " '于',\n",
       " '于是',\n",
       " '于是乎',\n",
       " '与',\n",
       " '与此同时',\n",
       " '与否',\n",
       " '与其',\n",
       " '越是',\n",
       " '云云',\n",
       " '哉',\n",
       " '再说',\n",
       " '再者',\n",
       " '在',\n",
       " '在下',\n",
       " '咱',\n",
       " '咱们',\n",
       " '则',\n",
       " '怎',\n",
       " '怎么',\n",
       " '怎么办',\n",
       " '怎么样',\n",
       " '怎样',\n",
       " '咋',\n",
       " '照',\n",
       " '照着',\n",
       " '者',\n",
       " '这',\n",
       " '这边',\n",
       " '这儿',\n",
       " '这个',\n",
       " '这会儿',\n",
       " '这就是说',\n",
       " '这里',\n",
       " '这么',\n",
       " '这么点儿',\n",
       " '这么些',\n",
       " '这么样',\n",
       " '这时',\n",
       " '这些',\n",
       " '这样',\n",
       " '正如',\n",
       " '吱',\n",
       " '之',\n",
       " '之类',\n",
       " '之所以',\n",
       " '之一',\n",
       " '只是',\n",
       " '只限',\n",
       " '只要',\n",
       " '只有',\n",
       " '至',\n",
       " '至于',\n",
       " '诸位',\n",
       " '着',\n",
       " '着呢',\n",
       " '自',\n",
       " '自从',\n",
       " '自个儿',\n",
       " '自各儿',\n",
       " '自己',\n",
       " '自家',\n",
       " '自身',\n",
       " '综上所述',\n",
       " '总的来看',\n",
       " '总的来说',\n",
       " '总的说来',\n",
       " '总而言之',\n",
       " '总之',\n",
       " '纵',\n",
       " '纵令',\n",
       " '纵然',\n",
       " '纵使',\n",
       " '遵照',\n",
       " '作为',\n",
       " '兮',\n",
       " '呃',\n",
       " '呗',\n",
       " '咚',\n",
       " '咦',\n",
       " '喏',\n",
       " '啐',\n",
       " '喔唷',\n",
       " '嗬',\n",
       " '嗯',\n",
       " '嗳',\n",
       " '~',\n",
       " '!',\n",
       " '.',\n",
       " ':',\n",
       " '\"',\n",
       " \"'\",\n",
       " '(',\n",
       " ')',\n",
       " '*',\n",
       " 'A',\n",
       " '白',\n",
       " '社会主义',\n",
       " '--',\n",
       " '..',\n",
       " '>>',\n",
       " '[',\n",
       " ']',\n",
       " '',\n",
       " '<',\n",
       " '>',\n",
       " '/',\n",
       " '\\\\',\n",
       " '|',\n",
       " '-',\n",
       " '_',\n",
       " '+',\n",
       " '=',\n",
       " '&',\n",
       " '^',\n",
       " '%',\n",
       " '#',\n",
       " '@',\n",
       " '`',\n",
       " ';',\n",
       " '$',\n",
       " '（',\n",
       " '）',\n",
       " '——',\n",
       " '—',\n",
       " '￥',\n",
       " '·',\n",
       " '...',\n",
       " '‘',\n",
       " '’',\n",
       " '〉',\n",
       " '〈',\n",
       " '…',\n",
       " '',\n",
       " '0',\n",
       " '1',\n",
       " '2',\n",
       " '3',\n",
       " '4',\n",
       " '5',\n",
       " '6',\n",
       " '7',\n",
       " '8',\n",
       " '9',\n",
       " '０',\n",
       " '１',\n",
       " '２',\n",
       " '３',\n",
       " '４',\n",
       " '５',\n",
       " '６',\n",
       " '７',\n",
       " '８',\n",
       " '９',\n",
       " '二',\n",
       " '三',\n",
       " '四',\n",
       " '五',\n",
       " '六',\n",
       " '七',\n",
       " '八',\n",
       " '九',\n",
       " '零',\n",
       " '＞',\n",
       " '＜',\n",
       " '＠',\n",
       " '＃',\n",
       " '＄',\n",
       " '％',\n",
       " '︿',\n",
       " '＆',\n",
       " '＊',\n",
       " '＋',\n",
       " '～',\n",
       " '｜',\n",
       " '［',\n",
       " '］',\n",
       " '｛',\n",
       " '｝',\n",
       " '啊哈',\n",
       " '啊呀',\n",
       " '啊哟',\n",
       " '挨次',\n",
       " '挨个',\n",
       " '挨家挨户',\n",
       " '挨门挨户',\n",
       " '挨门逐户',\n",
       " '挨着',\n",
       " '按理',\n",
       " '按期',\n",
       " '按时',\n",
       " '按说',\n",
       " '暗地里',\n",
       " '暗中',\n",
       " '暗自',\n",
       " '昂然',\n",
       " '八成',\n",
       " '白白',\n",
       " '半',\n",
       " '梆',\n",
       " '保管',\n",
       " '保险',\n",
       " '饱',\n",
       " '背地里',\n",
       " '背靠背',\n",
       " '倍感',\n",
       " '倍加',\n",
       " '本人',\n",
       " '本身',\n",
       " '甭',\n",
       " '比起',\n",
       " '比如说',\n",
       " '比照',\n",
       " '毕竟',\n",
       " '必',\n",
       " '必定',\n",
       " '必将',\n",
       " '必须',\n",
       " '便',\n",
       " '别人',\n",
       " '并非',\n",
       " '并肩',\n",
       " '并没',\n",
       " '并没有',\n",
       " '并排',\n",
       " '并无',\n",
       " '勃然',\n",
       " '不',\n",
       " '不必',\n",
       " '不常',\n",
       " '不大',\n",
       " '不但...而且',\n",
       " '不得',\n",
       " '不得不',\n",
       " '不得了',\n",
       " '不得已',\n",
       " '不迭',\n",
       " '不定',\n",
       " '不对',\n",
       " '不妨',\n",
       " '不管怎样',\n",
       " '不会',\n",
       " '不仅...而且',\n",
       " '不仅仅',\n",
       " '不仅仅是',\n",
       " '不经意',\n",
       " '不可开交',\n",
       " '不可抗拒',\n",
       " '不力',\n",
       " '不了',\n",
       " '不料',\n",
       " '不满',\n",
       " '不免',\n",
       " '不能不',\n",
       " '不起',\n",
       " '不巧',\n",
       " '不然的话',\n",
       " '不日',\n",
       " '不少',\n",
       " '不胜',\n",
       " '不时',\n",
       " '不是',\n",
       " '不同',\n",
       " '不能',\n",
       " '不要',\n",
       " '不外',\n",
       " '不外乎',\n",
       " '不下',\n",
       " '不限',\n",
       " '不消',\n",
       " '不已',\n",
       " '不亦乐乎',\n",
       " '不由得',\n",
       " '不再',\n",
       " '不择手段',\n",
       " '不怎么',\n",
       " '不曾',\n",
       " '不知不觉',\n",
       " '不止',\n",
       " '不止一次',\n",
       " '不至于',\n",
       " '才',\n",
       " '才能',\n",
       " '策略地',\n",
       " '差不多',\n",
       " '差一点',\n",
       " '常',\n",
       " '常常',\n",
       " '常言道',\n",
       " '常言说',\n",
       " '常言说得好',\n",
       " '长此下去',\n",
       " '长话短说',\n",
       " '长期以来',\n",
       " '长线',\n",
       " '敞开儿',\n",
       " '彻夜',\n",
       " '陈年',\n",
       " '趁便',\n",
       " '趁机',\n",
       " '趁热',\n",
       " '趁势',\n",
       " '趁早',\n",
       " '成年',\n",
       " '成年累月',\n",
       " '成心',\n",
       " '乘机',\n",
       " '乘胜',\n",
       " '乘势',\n",
       " '乘隙',\n",
       " '乘虚',\n",
       " '诚然',\n",
       " '迟早',\n",
       " '充分',\n",
       " '充其极',\n",
       " '充其量',\n",
       " '抽冷子',\n",
       " '臭',\n",
       " '初',\n",
       " '出',\n",
       " '出来',\n",
       " '出去',\n",
       " '除此',\n",
       " '除此而外',\n",
       " '除此以外',\n",
       " '除开',\n",
       " '除去',\n",
       " '除却',\n",
       " '除外',\n",
       " '处处',\n",
       " '川流不息',\n",
       " '传',\n",
       " '传说',\n",
       " '传闻',\n",
       " '串行',\n",
       " '纯',\n",
       " '纯粹',\n",
       " '此后',\n",
       " '此中',\n",
       " '次第',\n",
       " '匆匆',\n",
       " '从不',\n",
       " '从此',\n",
       " '从此以后',\n",
       " '从古到今',\n",
       " '从古至今',\n",
       " '从今以后',\n",
       " '从宽',\n",
       " '从来',\n",
       " '从轻',\n",
       " '从速',\n",
       " '从头',\n",
       " '从未',\n",
       " '从无到有',\n",
       " '从小',\n",
       " '从新',\n",
       " '从严',\n",
       " '从优',\n",
       " '从早到晚',\n",
       " '从中',\n",
       " '从重',\n",
       " '凑巧',\n",
       " '粗',\n",
       " '存心',\n",
       " '达旦',\n",
       " '打从',\n",
       " '打开天窗说亮话',\n",
       " '大',\n",
       " '大不了',\n",
       " '大大',\n",
       " '大抵',\n",
       " '大都',\n",
       " '大多',\n",
       " '大凡',\n",
       " '大概',\n",
       " '大家',\n",
       " '大举',\n",
       " '大略',\n",
       " '大面儿上',\n",
       " '大事',\n",
       " '大体',\n",
       " '大体上',\n",
       " '大约',\n",
       " '大张旗鼓',\n",
       " '大致',\n",
       " '呆呆地',\n",
       " '带',\n",
       " '殆',\n",
       " '待到',\n",
       " '单',\n",
       " '单纯',\n",
       " '单单',\n",
       " '但愿',\n",
       " '弹指之间',\n",
       " '当场',\n",
       " '当儿',\n",
       " '当即',\n",
       " '当口儿',\n",
       " '当然',\n",
       " '当庭',\n",
       " '当头',\n",
       " '当下',\n",
       " '当真',\n",
       " '当中',\n",
       " '倒不如',\n",
       " '倒不如说',\n",
       " '倒是',\n",
       " '到处',\n",
       " '到底',\n",
       " '到了儿',\n",
       " '到目前为止',\n",
       " '到头',\n",
       " '到头来',\n",
       " '得起',\n",
       " '得天独厚',\n",
       " '的确',\n",
       " '等到',\n",
       " '叮当',\n",
       " '顶多',\n",
       " '定',\n",
       " '动不动',\n",
       " '动辄',\n",
       " '陡然',\n",
       " '都',\n",
       " '独',\n",
       " '独自',\n",
       " '断然',\n",
       " '顿时',\n",
       " '多次',\n",
       " '多多',\n",
       " '多多少少',\n",
       " '多多益善',\n",
       " '多亏',\n",
       " '多年来',\n",
       " '多年前',\n",
       " '而后',\n",
       " '而论',\n",
       " '而又',\n",
       " '尔等',\n",
       " '二话不说',\n",
       " '二话没说',\n",
       " '反倒',\n",
       " '反倒是',\n",
       " '反而',\n",
       " '反手',\n",
       " '反之亦然',\n",
       " '反之则',\n",
       " '方',\n",
       " '方才',\n",
       " '方能',\n",
       " '放量',\n",
       " '非常',\n",
       " '非得',\n",
       " '分期',\n",
       " '分期分批',\n",
       " '分头',\n",
       " '奋勇',\n",
       " '愤然',\n",
       " '风雨无阻',\n",
       " '逢',\n",
       " '弗',\n",
       " '甫',\n",
       " '嘎嘎',\n",
       " '该当',\n",
       " '概',\n",
       " '赶快',\n",
       " '赶早不赶晚',\n",
       " '敢',\n",
       " '敢情',\n",
       " '敢于',\n",
       " '刚',\n",
       " '刚才',\n",
       " '刚好',\n",
       " '刚巧',\n",
       " '高低',\n",
       " '格外',\n",
       " '隔日',\n",
       " '隔夜',\n",
       " '个人',\n",
       " '各式',\n",
       " '更',\n",
       " '更加',\n",
       " '更进一步',\n",
       " '更为',\n",
       " '公然',\n",
       " '共',\n",
       " '共总',\n",
       " '够瞧的',\n",
       " '姑且',\n",
       " '古来',\n",
       " '故而',\n",
       " '故意',\n",
       " '固',\n",
       " '怪',\n",
       " '怪不得',\n",
       " '惯常',\n",
       " '光',\n",
       " '光是',\n",
       " '归根到底',\n",
       " '归根结底',\n",
       " '过于',\n",
       " '毫不',\n",
       " '毫无',\n",
       " '毫无保留地',\n",
       " '毫无例外',\n",
       " '好在',\n",
       " '何必',\n",
       " '何尝',\n",
       " '何妨',\n",
       " '何苦',\n",
       " '何乐而不为',\n",
       " '何须',\n",
       " '何止',\n",
       " '很',\n",
       " '很多',\n",
       " '很少',\n",
       " '轰然',\n",
       " '后来',\n",
       " '呼啦',\n",
       " '忽地',\n",
       " '忽然',\n",
       " '互',\n",
       " '互相',\n",
       " '哗啦',\n",
       " '话说',\n",
       " '还',\n",
       " '恍然',\n",
       " '会',\n",
       " '豁然',\n",
       " '活',\n",
       " '伙同',\n",
       " '或多或少',\n",
       " '或许',\n",
       " '基本',\n",
       " '基本上',\n",
       " '基于',\n",
       " '极',\n",
       " '极大',\n",
       " '极度',\n",
       " '极端',\n",
       " '极力',\n",
       " '极其',\n",
       " '极为',\n",
       " '急匆匆',\n",
       " '即将',\n",
       " '即刻',\n",
       " '即是说',\n",
       " '几度',\n",
       " '几番',\n",
       " '几乎',\n",
       " '几经',\n",
       " '既...又',\n",
       " '继之',\n",
       " '加上',\n",
       " '加以',\n",
       " '间或',\n",
       " '简而言之',\n",
       " '简言之',\n",
       " '简直',\n",
       " '见',\n",
       " '将才',\n",
       " '将近',\n",
       " '将要',\n",
       " '交口',\n",
       " '较比',\n",
       " '较为',\n",
       " '接连不断',\n",
       " '接下来',\n",
       " '皆可',\n",
       " '截然',\n",
       " '截至',\n",
       " '藉以',\n",
       " '借此',\n",
       " '借以',\n",
       " '届时',\n",
       " '仅',\n",
       " '仅仅',\n",
       " '谨',\n",
       " '进来',\n",
       " '进去',\n",
       " ...]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stopwords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_words_list1' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-7-bbd826729ac4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_words_list1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m: name 'train_words_list1' is not defined"
     ]
    }
   ],
   "source": [
    "len(train_words_list1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache C:\\Users\\19939\\AppData\\Local\\Temp\\jieba.cache\n",
      "Loading model cost 0.558 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    }
   ],
   "source": [
    "# 训练数据\n",
    "train_words_list1, train_labels1 = loadfile('text classification/train/女性', '女性')\n",
    "train_words_list2, train_labels2 = loadfile('text classification/train/体育', '体育')\n",
    "train_words_list3, train_labels3 = loadfile('text classification/train/文学', '文学')\n",
    "train_words_list4, train_labels4 = loadfile('text classification/train/校园', '校园')\n",
    "\n",
    "train_words_list = train_words_list1 + train_words_list2 + train_words_list3 + train_words_list4\n",
    "train_labels = train_labels1 + train_labels2 + train_labels3 + train_labels4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stopwords = stopwordslist('text classification/stop/stopword.txt')\n",
    "words_list=np.zeros(len(train_words_list1))\n",
    "words1 = []\n",
    "for i in range(len(train_words_list1)):\n",
    "    for word in train_words_list1[i]:\n",
    "        if word not in stopwords:\n",
    "            if word != '\\t':\n",
    "                words1.append(word)\n",
    "    words_list.append(words1)\n",
    "    print('\\n【去除停用词后的分词：】'+ '\\n'+ words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "words_list.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 测试数据\n",
    "test_words_list1, test_labels1 = loadfile('text classification/test/女性', '女性')\n",
    "test_words_list2, test_labels2 = loadfile('text classification/test/体育', '体育')\n",
    "test_words_list3, test_labels3 = loadfile('text classification/test/文学', '文学')\n",
    "test_words_list4, test_labels4 = loadfile('text classification/test/校园', '校园')\n",
    "\n",
    "test_words_list = test_words_list1 + test_words_list2 + test_words_list3 + test_words_list4\n",
    "test_labels = test_labels1 + test_labels2 + test_labels3 + test_labels4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "stop_words = open('text classification/stop/stopword.txt', 'r', encoding='utf-8').read()\n",
    "stop_words = stop_words.encode('utf-8').decode('utf-8-sig') # 列表头部\\ufeff处理\n",
    "stop_words = stop_words.split('\\n') # 根据分隔符分隔"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算单词权重\n",
    "tf = TfidfVectorizer(stop_words=stop_words, max_df=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_features = tf.fit_transform(train_words_list)\n",
    "# 上面fit过了，这里transform\n",
    "test_features = tf.transform(test_words_list) \n",
    "# train_features.shape\n",
    "# tf.get_feature_names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以下为KNN算法训练上面处理好的数据，分为手写算法实现与sklearn库实现"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1.手写算法实现  准确率为： 0.88"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data=train_features.toarray()\n",
    "test_data=test_features.toarray()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "函数说明:kNN算法,分类器\n",
    " \n",
    "Parameters:\n",
    "    inX - 用于分类的数据(测试集)\n",
    "    dataSet - 用于训练的数据(训练集)\n",
    "    labes - 分类标签\n",
    "    k - kNN算法参数,选择距离最小的k个点\n",
    "Returns:\n",
    "    sortedClassCount[0][0] - 分类结果\n",
    " \n",
    "\"\"\"\n",
    "def classify0(inX, dataSet, labels, k):\n",
    "    #numpy函数shape[0]返回dataSet的行数\n",
    "    dataSetSize = dataSet.shape[0]\n",
    "    #在列向量方向上重复inX共1次(横向),行向量方向上重复inX共dataSetSize次(纵向)\n",
    "    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet\n",
    "    #二维特征相减后平方\n",
    "    sqDiffMat = diffMat**2\n",
    "    #sum()所有元素相加,sum(0)列相加,sum(1)行相加\n",
    "    sqDistances = sqDiffMat.sum(axis=1)\n",
    "    #开方,计算出距离\n",
    "    distances = sqDistances**0.5\n",
    "    #返回distances中元素从小到大排序后的索引值\n",
    "    sortedDistIndices = distances.argsort()\n",
    "    #定一个记录类别次数的字典\n",
    "    classCount = {}\n",
    "    for i in range(k):\n",
    "        #取出前k个元素的类别\n",
    "        voteIlabel = labels[sortedDistIndices[i]]\n",
    "        #dict.get(key,default=None),字典的get()方法,返回指定键的值,如果值不在字典中返回默认值。\n",
    "        #计算类别次数\n",
    "        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1\n",
    "    #reverse降序排序字典\n",
    "    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)\n",
    "    #返回次数最多的类别,即所要分类的类别\n",
    "    return sortedClassCount[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=[]\n",
    "b=[]\n",
    "c=[]\n",
    "d=[]\n",
    "pre=[]\n",
    "def datingClassTest():\n",
    "    #分类错误计数\n",
    "    errorCount = 0.0\n",
    "    for i in range(test_features.shape[0]):\n",
    "        \n",
    "        #前numTestVecs个数据作为测试集,后m-numTestVecs个数据作为训练集\n",
    "        classifierResult = classify0(test_data[i], train_data,train_labels, 4)\n",
    "        pre.append(classifierResult)\n",
    "        print(\"分类结果: \",classifierResult,\"真实类别: \",test_labels[i])\n",
    "        if test_labels[i]=='女性':\n",
    "            a.append(classifierResult)\n",
    "        if test_labels[i]=='体育':\n",
    "            b.append(classifierResult)\n",
    "        if test_labels[i]=='文学':\n",
    "            c.append(classifierResult)\n",
    "        if test_labels[i]=='校园':\n",
    "            d.append(classifierResult)\n",
    "        if classifierResult != test_labels[i]:\n",
    "            errorCount += 1.0\n",
    "    print(\"正确率:%f%%\" %((len(test_labels)-errorCount)/float(len(test_labels))*100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  文学 真实类别:  女性\n",
      "分类结果:  文学 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  体育 真实类别:  女性\n",
      "分类结果:  体育 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  体育 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  体育 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  体育 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  文学 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  女性\n",
      "分类结果:  女性 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  文学 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  文学 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  校园 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  文学 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  女性 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  体育 真实类别:  体育\n",
      "分类结果:  女性 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  体育 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  校园 真实类别:  文学\n",
      "分类结果:  校园 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  文学 真实类别:  文学\n",
      "分类结果:  体育 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  女性 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  女性 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "分类结果:  女性 真实类别:  校园\n",
      "分类结果:  女性 真实类别:  校园\n",
      "分类结果:  女性 真实类别:  校园\n",
      "分类结果:  校园 真实类别:  校园\n",
      "正确率:88.000000%\n"
     ]
    }
   ],
   "source": [
    "datingClassTest()#这里运行速度很慢，KNN效率不高"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "    \n",
    "def sta(a):\n",
    "    nv=0\n",
    "    wen=0\n",
    "    ti=0\n",
    "    xiao=0\n",
    "    for i in range(len(a)):\n",
    "        if a[i]=='女性':\n",
    "            nv=nv+1\n",
    "        if a[i]=='文学':\n",
    "            wen=wen+1\n",
    "        if a[i]=='体育':\n",
    "            ti=ti+1\n",
    "        if a[i]=='校园':\n",
    "            xiao=xiao+1\n",
    "    return nv,wen,ti,xiao \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import confusion_matrix,roc_curve,auc,precision_recall_curve,average_precision_score\n",
    "from sklearn.model_selection import learning_curve\n",
    "import matplotlib.pyplot as plt\n",
    "# 设置matplotlib正常显示中文和负号\n",
    "plt.rcParams['font.sans-serif']=['SimHei']\n",
    "plt.rcParams['axes.unicode_minus']=False "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "def confusion_metrix(y,y_p):\n",
    "    Confusion_matrix=confusion_matrix(y,y_p) #y代表真实值，y_p 代表预测值\n",
    "    plt.matshow(Confusion_matrix)\n",
    "    plt.title(\"Beyes\")\n",
    "    plt.colorbar()\n",
    "    plt.ylabel(\"真实值\")\n",
    "    plt.xlabel(\"预测值\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQYAAAD5CAYAAADFhptEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUkUlEQVR4nO3dfZBddX3H8fcnSyDPIXEjEkUoEq0yFpGIIUIanGYKig9N0TAiTkWb2jJanenUUqhaRmlLHZwpleA6Aa3MIMEZohY0gMAYDRGDOnSwCD4QLUo1GLKghYS73/5xzpq79+7dPWf3nvzOvft5zZzJveeePedL2PvN7/mniMDMrNms1AGYWf04MZhZGycGM2vjxGBmbZwYzKyNE4OZtXFi6AOSPiLpn/PXJ0r6taQbJN3fdM3dkv4sv7btfIKwrcacGPqIpNnA54BLgB8AL5d02jiXdjpvBjgx9JsPAz+PiE35+2HgL8a5rtN5MwAOSx2Adc2pwBrgxU3nbgLWSzqy5dpO580Alxj6yZnAz4D1Tef2ALcA72i5ttN5M8CJoZ/cBFwI/I2kuU3nr2H8akOn82ZODH3kxxFxF/Aj4M9HT0bEN4EGcHLzxZ3Om4HbGPrRx4Ahst6JUZuAq8e5ttN5q9gfnzk/Hv91o9C1993/zLaIOKvikMZwYugzEXGrpP8lq1Zcl5++HrhinMs7nbeK7fl1g29te0Gha2cf/aPBisNp48TQByLiIy3vT2l5/ySwcJyfG/e8HQpBI0ZSB9GRE4NZAgGMUN9FkpwYzBIZwSUGM2sSBI0aL6voxGCWiKsSZjUhaSlwCvDdiNiTKo4AGjVODB7gVCOSjpK0PXUcRUhaLOkrkm6XdLOkw1PHNBlJR5MNBT8VuEvSslSxBHAgRgodKfRtYpC0WdIOSZemjqUISUuAzwLzU8dS0PnAlRGxDngMOKQDcKboROADEfExYBvwypTBjBQ8UujLxCBpPTAQEauB5ZJWpI6pgAawgWxKdO1FxNURcXv+dhnwy5TxFBERd0TETklryEoN9ySLhaBR8EihX9sY1gJb8td3AqcDDyeLpoCIGAaQlDqUUvIFX5ZExM7UsRSh7C94A3CALBmnEdCobxNDf5YYyIrjj+avh4GjEsbSt/KGvKvIhl/3hMhcBOwAzkkWB/WuSvRrieEpYHTq8QL6NwEmkzc2bgEujojdqeMpQtIHgV9ExH8ARwJPJIyGBvUtHfbrF+Y+suoDwEnAI+lC6VvvIuv2uyRfUHZD6oAKGAIukPR1YAC4LVUgAYxEsSOFfi0xbAW2S1oOnA2sShtOcRGxNnUMReTrSm6a9MIaiYi9wLrUcUCWGPbX+N/l+kY2DXlD3lpgJ3BmROxLG5FZu5FQoaOI5jEwkmZL+s+8u/7CTucm0peJAbJ/HSJiS0Q8ljoWs1bZyEcVOiYzzhiY9wK78u76cyQt7HCuo75NDGZ1FogGswodBbSOgVnLwe76HcDKDuc66uvEIGlj6hjK6LV4wTFPR4mqxKCkXU3HmPgjYrilujxed32pLvy+TgxALX4BSui1eMExT0nJqsSeiFjZdAxNcvvxuutLdeH3e2IwqynRiFmFjikYr7u+VBd+8u7KwaUDcdwxsyu59wuffxgrT5rT9Z7gh/6rmnlOc5jHolnP6Xq8VQ6znqP5LB4Y7HrMMVLdmL85zGORlnY95ifZuyciCs3YDOAAA90OYdRngVslnQG8DPgWWTWi9VxHyRPDccfM5t5tx6QOo5Szjj01dQilaHby/82ljTz9TOoQSrujcWPhEaARmmppYIJ7ZmNgImK3pHVkJYQPRUQDGO9cR733G2PWJ0YqHBIdET/nYC9Ex3OdODGYJZA1Pta3ic+JwSyJ7lclusmJwSyBbNq1E4OZNQnE/qisV2LanBjMEhlxVcLMmrnx0czaBKJRcEp1Ck4MZom48dHMxojA3ZVm1kqVjnycLicGswQC2B/1/frVNzKzPhYUX88xBScGs0TcXWlmY2T7SjgxmNkY9d6JyonBLAGXGMxsXC4xmNkYEeLASH2/fpWVZSRtzrfDurSqZ5j1qmw9BhU6UqgkMUhaDwzk22Etl7SiiueY9a5Kl4+ftqqeupaDi07eycH17IFsJ6DRXXV+9fiEi9Wa9aWs8bF7m9p2W1WJYcLtsCJiaHRXnWXPqe8qNmZV6uLelV1XVetHqe2wzGaaug+JruoLW2o7LLOZaIRZhY4UqioxbAW2S1oOnA2squg5Zj0pAg6M1LcgXUlkETFM1gC5EzizZYtusxkvq0rMKnSkUNkIi4jYS8HtsMxmIo98NLMxRrsr68qJwSwJeRKVmbXzmo9mNka2SrQTg5k1CcSzI/Ud9VvfSo5Zn+vW7EpJSyTdKmm7pGvyc9Oa3ezEYJZAlydRXQBcHxFnAAsl/S3TnN3sqoRZIiV6JQYl7Wp6PxQRQ03vHwdeIulI4BhgH+2zmx8uE5sTg1kK5aZU74mIlRN8/g3g9cD7gAeBIxg7u/mEsuG5KmGWQJdXcLoceE9EXEaWGN7GNGc3OzGYJdLFNoZ5wMslDQCvBv6Zac5udlXCLIEAnu3e7Mp/Aq4DjgXuAT7BNGc3OzGYJdDNhVoi4l7gxOZzktYC64ArpjK72YnBLJEqh0RPd3azE4NZCuHZlRN6+IGFvO5lf5g6jFKG1780dQilLP7y/alDKG+kv1cP97RrMxuXE4OZjRGIRo3XfHRiMEvE6zGY2RjhxkczG084MZjZWPXeicqJwSwRlxjMbAyPYzCzdl4M1sxaBa5KmFkbNz6a2TgiUkfQmRODWSKuSpjZGBFODGY2DrcxmFmbkREnBjNrEshVCTNrV+NOCScGsyRq3vhY2RIyko6StL2q+5v1vCh4JFBJiUHSEuCzwPwq7m/WD2ZiiaEBbCDbULONpI2SdknatT/+r6IQzOotG8sw+ZFCJSWGiBgGkMbPiPkW3kMAiw9bVuc2GLNKREB4MVgza+W5EmbWzonBzMaawQOcImJtlfc362kuMZjZGDUf4OTEYJaKSwxm1qbGJYb6dqSa9bsuD4mWdLWkN+SvN0vaIenSqYQ2YWKQNEvSuMOa88/eOpWHms14QVZiKHIUIOkM4HkR8WVJ64GBiFgNLJe0omx4k1UljgPOlfRtYElrLMAFwJayDzWzUgOcBiXtano/lI8eBkDSbODTwK2S3gSs5eD38k7gdODhMrFNlhieJZv38A/AduAoYA3wnfxBNW4+Mau54t+ePRGxcoLP3wF8H7gCeC9wEbA5/2wYOKFsaB0Tg6TDgI8CC4GjgVuAFcBLgHuBbwKnlH2gmeW61/h4Mlkp4jFJ1wOrgbn5ZwuYQlviZD+wHdjfcl20/GlmZQVopNhRwA+B4/PXK8maAE7P358EPFI2vI4lhoh4VtJtwGJgGXAVWRY6Oj/eBvyy7APNDKB4w2IBm4FrJZ0HzCZrY/iSpOXA2cCqsjecrI3hGOB7EfHx1g8kzSKrXpjZVHSpzB0RTwJvaT4naS2wDrgiIvaVvedEbQxHAJcAT0t67TiXzAIeLftAM8tVWBmPiL1Mo8dwoqrEM8DZko4HLgf+AHg/8Hh+iYAjpvpgsxmvxq10kw6JjogfA+dJOhf4aUQ8WH1YZn1udIBTTRXuxoiIL0TEg5JeM3our26Y2RQoih0pTJoYJD3cMurq8vz8nwAfriows77X48vHPxIR65re/0bSAHAx8PrpBhCNBo3hp6Z7m0Nq4U3fTh1CKb95Y++NQ5t3y/dSh1De/nKXpyoNFFEkMYSkE8nmSjyUn3s78MWI+FVlkZn1uxq3MUzUXTkbOJdsSPRLgTPIBja9Crgb+MQhiM+sPyWsJhQxURvDINkAiWcj4gvAvoh4K7ALOBJ4X/XhmfWxGrcxdEwMEfGLiLiQbIDTqcAcSecAioi/B86R9NxDFahZv+npXgmynPUQ8BngfGB0Wsdm4LxqwjKbAXqxxNDkWLJZlsPAZcC8/Pw2srYHMytJ3Z1d2XVFRj6+pPm9pH+VdGFEXCvpr6sLzazP1bhXYrI1H0/L2xV+JyJuAc6XdCTwqQpjM+tvNa5KTFZimAUMSLofeJps4lSQVS/eCdxVbXhm/avOA5wma2MYDf1xsrUXngC+BtwPvBi4vrLIzPpdD5cY3gj8lPYwIyL+ssrAzPpawq7IIjqWGPIVmuaRJQfIqhHNn4+734SZFVTjEsNEA5xGgBuBTaOnmv4UcI2kwWrDM+tfde6uLLoewyKyRSUXAmeSreb0KeA9FcVlZglN1sYwABzeutmFpDsj4hv5qk5mNhU1bmOYLDF8k5a2hdynASLi/d0OyGxGqHnj44SJISIaHc7fUE04ZjNIryYGM6uQE4OZNRM9XJWYKkmLgc/n938K2BARJVfEM+tjka4rsojSu+AWdD5wZb6I7GPAWRU9x6x31XiAUyUlhoi4uuntMrz5rVm7mVaVGCXpNGBJROxsOb8R2Agw53frvpjNLDOujQFA0lLgKuBPWz+LiCFgCGCRltb4r8esQjX+za+q8fFwsp12L46I3VU8w6yn9fDy8dPxLuAU4BJJd0vaUNFzzHpWnSdRVdX4uImDszLNbBx1bmOoqsRgZpPpYnelpKMkfTd/vVnSDkmXTjU0JwazFIomheKlio8DcyWtBwYiYjWwXNKKqYTnxGCWgEocwKCkXU3HxjH3kl4L/IZsMOFasoZ/gDuB06cSn+dKmKVSvDSwp3VNlFF5D+CHgDcDW4H5wKP5x8PACVMJzYnBLJEuNT7+HfDJiHhCEmRzk+bmny1girUCJwazVLrTFflHwGslXQS8Angh8DNgJ3AS8IOp3NSJwSyFLq3gFBFrRl9LuptsVfftkpaTrdO6air3deOjWSpdnl0ZEWsjYpisAXIncGZE7JtKaC4xmCVS1QCniNjLwZ6JKXFiMEulxiMfnRjMEqnzkGgnBrMUaj670onBLAFR7zUfnRjMUnGJwcxaKeqbGZwYzFJwG4OZjce9En1mYNGC1CGUMnfrvalDKG3guBemDqG8n5S83onBzFq5xGBmY9V8izonBrNUXGIws2YzcrdrMyvA4xjMrJVLDGY2lgc4mdl43CthZm2cGMxsrMCNj2bWzo2PZtbOicHMmnmAk5m1i3Abg5m1c6+EmbVxVcLMxgpgpL6ZwYnBLJX65oXqNrWVtFTSOkmDVT3DrJcpih0pVJIYJB0N3AKcCtwlaVkVzzHraaM9E5MdCVRVlTgR+EBE7JS0BHglsK2iZ5n1pBnX+BgRdwBIWkNWarisiueY9SoFaCY2PkoSsAE4ADRaPtsIbASYw7yqQjCrtxqPY6is8TEyFwE7gHNaPhuKiJURsXI2R1QVglmtKaLQMel9pMWSviLpdkk3Szpc0mZJOyRdOpXYqmp8/KCkd+RvjwSeqOI5Zj0rShyTOx+4MiLWAY8B5wEDEbEaWC5pRdnwqioxDAEXSPo6MADcVtFzzHpUwR6JrMQwKGlX07FxzJ0iro6I2/O3y4C3A1vy93cCp5eNrqrGx73AuirubdYvSvRK7ImIlZPeTzoNWAI8Ajyanx4GTigbW2VtDGY2iS6OY5C0FLgKuBB4Cpibf7SAKXzPnRjMUghQIwodk5F0OFnV4eKI2A3cx8Hqw0lkJYhSnBjMUule4+O7gFOASyTdTbYOzAWSrgTeSjYKuRRPojJLpEhXZBERsQnYNObe0pfI2vmuiIh9Ze/pxGCWSoXzIPIOgC2TXtiBE4NZCkGtRz46MZglIIqNakzFicEsFScGMxsjgAJdkak4MZgl4qqEmbVzYjCzsbzhjJm18m7XZjYuj2Mws1ZufDSzsQJo1LfI4MRgloQbHyf0JHv33NG4cXdFtx8E9nT9rnu7fsdR1cRbrWpi/knX79isqr/nY0td7cTQWURUtkuVpF1FlsSqi16LFxzztDgxmNkY3u3azNoFhBsfUxlKHUBJpeKVNBtoRGS/YZIOI+sdnx8RT3b4meOBvflCHkiaExFPN92PiDhQVcw1kT7mmvdK9PWajxGR/heghMnilXRGvtvQlyU9SrbW3xclPS5pK7AVWA3cIWmtpJskfUbSjZJOzm9zIXBy0223Sloj6TjgncC1ko6T9KI80Uwr5jqqTcwzcLdrq0BEbJf0L8BZwLURcTNwjaRtEfHm0eskvY5sf4EGcAnZTkWDkm4j2zJwtITxIuAZYA7wFuBVwBHAuWS/G58Exi15WBfUuPGxr0sMfeq3wKsj4mZJqyTdC+yWdI2k+yWtAl4TET/Mr7+GbJvAA8D+lntdDvw3cAfwOrKSxO+T7TX67U7VEeuGUjtRHXIuMfQQSeeT7RKufJnwrwK3km0wcg/wAuAB4KOSHsx/rEG2G1Hrvd5CtufATyJiRNJ84IL849eTlTisKgGMuI3BuuMGYC3ZJsH3Aj/Pzz+PfMBO/q/8G8iGCAmYDTybv272APD+pvdzybYyOwF4bgWxWyuXGKwbmnofAC4GXgkcDxwD/JSDX/43AS8mSwiLyNoJRpPE6L2+L2le0+2PBt6dv34ecDtWrRq3MTgx9KiIaEj6LbAbWEPWiHiPpFnA+8gaEF8BrAd+D/g0WQmx087He8h6NQBOrSxwy0QQjUbqKDpyVaLHKCsuCCAiHgAWAl8DPpf/+W6ynocngcuAjwBPA+8BfkDWuDj6GylglqQBYB/wjfx4KH/WwKH4b5qxRqLYkYBLDD0k37x0B3BDPsbg38m+3H9F1kawhSwRbCFrJ/jHiPgfSZeTVRWOAr5D1j4BWdfkIFkD5q/ynx31KrLfj89X+h81k9W4KqGocXA2MUnPj4hHm97PA56JiPqWUQ2AxQODcdqCNxa6dtvwdfcd6klfLjH0sOakkL//bapYbApq/I+yE4NZIlHjcQxODGZJeAUnM2sVQI27K50YzBIIIGq8UIvHMZilEPlCLUWOAiRtlrRD0qXdCM+JwSyRGIlCx2QkrQcGImI1sFzSiunG5nEMZglI+irZ4LIi5pCNXh011LzYjKR/A74aEbdKOhdYGBHXTSc+tzGYJRARZ3XxdvOB0TEtw2QzZKfFVQmz3vcU2ZB4gAV04XvtxGDW++7j4KzZk4BHpntDtzGY9ThJi4DtZLNrzwZWRcS+ad3TicGs90laAqwDvh4Rj037fk4MZtbKbQxm1saJwczaODGYWRsnBjNr48RgZm3+H650IoPj6vc0AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 288x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "confusion_metrix(test_labels,pre)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_PR(model,x_test,y_test):#绘制PR曲线\n",
    "    y_pro=model.predict_proba(x_test)\n",
    "    precision,recall,thresholds=precision_recall_curve(y_test,y_pro[:,1])\n",
    "    average_precision = average_precision_score(y_test, y_pro[:, 1])\n",
    "    ax2 = plt.subplot(224)\n",
    "    ax2.set_title(\"Precision_Recall Curve AP=%0.2f\"%average_precision,verticalalignment='center')\n",
    "    plt.step(precision, recall,where='post',alpha=0.2,color='r')\n",
    "    plt.fill_between(recall,precision,step='post',alpha=0.2,color='b')\n",
    "    plt.xlim([0.0, 1.0])\n",
    "    plt.ylim([0.0, 1.05])\n",
    "    plt.ylabel('Precision')\n",
    "    plt.xlabel('Recall')\n",
    " \n",
    "def plot_ROC(model,x_test,y_test):#绘制ROC和AUC，来判断模型的好坏\n",
    "    y_pro=model.predict_proba(x_test)\n",
    "    false_positive_rate,recall,thresholds=roc_curve(y_test,y_pro[:,1])\n",
    "    roc_auc=auc(false_positive_rate,recall)\n",
    "    ax3=plt.subplot(223)\n",
    "    ax3.set_title(\"Receiver Operating Characteristic\",verticalalignment='center')\n",
    "    plt.plot(false_positive_rate,recall,'b',label='AUC=%0.2f'%roc_auc)\n",
    "    plt.legend(loc='lower right')\n",
    "    plt.plot([0,1],[0,1],'r--')\n",
    "    plt.xlim([0.0,1.0])\n",
    "    plt.ylim([0.0,1.0])\n",
    "    plt.ylabel('Recall')\n",
    "    plt.xlabel('false_positive_rate')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "multiclass format is not supported",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-58-d8cbcfab3371>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mpre\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpre\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mtest_labels\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_labels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mfalse_positive_rate\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mrecall\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mthresholds\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mroc_curve\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_labels\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpre\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;31m#pre.shape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\conda\\envs\\pytorch\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36minner_f\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m     61\u001b[0m             \u001b[0mextra_args\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     62\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mextra_args\u001b[0m \u001b[1;33m<=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     64\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     65\u001b[0m             \u001b[1;31m# extra_args > 0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\conda\\envs\\pytorch\\lib\\site-packages\\sklearn\\metrics\\_ranking.py\u001b[0m in \u001b[0;36mroc_curve\u001b[1;34m(y_true, y_score, pos_label, sample_weight, drop_intermediate)\u001b[0m\n\u001b[0;32m    911\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    912\u001b[0m     \"\"\"\n\u001b[1;32m--> 913\u001b[1;33m     fps, tps, thresholds = _binary_clf_curve(\n\u001b[0m\u001b[0;32m    914\u001b[0m         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)\n\u001b[0;32m    915\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\conda\\envs\\pytorch\\lib\\site-packages\\sklearn\\metrics\\_ranking.py\u001b[0m in \u001b[0;36m_binary_clf_curve\u001b[1;34m(y_true, y_score, pos_label, sample_weight)\u001b[0m\n\u001b[0;32m    689\u001b[0m     if not (y_type == \"binary\" or\n\u001b[0;32m    690\u001b[0m             (y_type == \"multiclass\" and pos_label is not None)):\n\u001b[1;32m--> 691\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"{0} format is not supported\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    692\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    693\u001b[0m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_score\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: multiclass format is not supported"
     ]
    }
   ],
   "source": [
    "pre=np.array(pre)\n",
    "test_labels=np.array(test_labels)\n",
    "false_positive_rate,recall,thresholds=roc_curve(test_labels,pre)\n",
    "#pre.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'sta' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-30-b36764c7795d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnv\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mwen\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mti\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mxiao\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msta\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0md\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mnv\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mwen\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mti\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mxiao\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'sta' is not defined"
     ]
    }
   ],
   "source": [
    "nv,wen,ti,xiao=sta(d)\n",
    "nv,wen,ti,xiao"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设置matplotlib正常显示中文和负号\n",
    "plt.rcParams['font.sans-serif']=['SimHei']\n",
    "plt.rcParams['axes.unicode_minus']=False "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABQAAAAIFCAYAAACNjlDKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAxOAAAMTgF/d4wjAAAZ/0lEQVR4nO3df6zd913f8de7dX60ccpU1ZVGQ5eqq2E0lfgV2sZTlQVBmillg7Gt09pifqhQ0MaUVltgqE3TRN3ULCT80FZUprCRiTQMlTAyGNoSSlWLtKGFoVAqGCFyKagtazILpyXxe3+cY3F78fU9tk9y6rcfD+kr+5zzOd/7vpGiq/v05/s91d0BAAAAAGZ6xqYHAAAAAACeOgIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAy2Z9MDJMkFF1zQ+/bt2/QYAAAAAHBW+sQnPvH57r7gRK99UQTAffv25fDhw5seAwAAAADOSlX1qZ1ecwkwAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAwmAAIAAADAYAIgAAAAAAy2Z9MDAAAAAMPd8CWbnoBz3Q2PbnqCjbIDEAAAAAAGEwABAAAAYDABEAAAAAAGEwABAAAAYDABEAAAAAAGEwABAAAAYDABEAAAAAAGEwABAAAAYDABEAAAAAAGEwABAAAAYLA9mx4AeBrc8CWbnoBz3Q2PbnoCAACAc5YdgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIP5EJCnwaXX/9KmR+Ac9/CFm54AAAAA2BQ7AAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgMAEQAAAAAAYTAAEAAABgsJUCYFXdXFUPVtW9VbXvJOveVVXvr6oHquofrG9MAAAAAOB07BoAq+qaJFckuTzJLUlu2mHdZUle2t2vSvINSW5b35gAAAAAwOlYZQfg1Unu7O5jSe7LIgaeyJ8luWS5Q/Drk3x8pxNW1XVVdfj4ceTIkVOdGwAAAABYwSoB8OIkjyRJd3eSi3ZY96dJHkjypiSvS/KenU7Y3bd29yXHj717957a1AAAAADASvassOaxfGH0O3+HdW9I8r+7+/YkqaoPVNX/6u4/PcMZAQAAAIDTtMoOwENJrkqSqtqfZKfrdS9K8lVV9YyqujTJ/iRPrmNIAAAAAOD0rBIA70lyoKpuS/LeJLdX1cGqOrht3X9O8rwkn03yG0ne1d2fXt+oAAAAAMCp2vUS4O5+vKoOJLk2yV3dfWiHdY8mec2a5wMAAAAAzsAq9wBMdx9NcvdTPAsAAAAAsGarXAIMAAAAAJylBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGEwABAAAAIDBBEAAAAAAGGzPpgcAAADgqXXp9b+06RE4xz184aYngHObHYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMJgACAAAAACDCYAAAAAAMNhKAbCqbq6qB6vq3qrat8vav1VVH6qqC9YzIgAAAABwunYNgFV1TZIrklye5JYkN51k7flJfjrJ93b359Y1JAAAAABwelbZAXh1kju7+1iS+7KIgTt5x/LPv1NVX7HToqq6rqoOHz+OHDmy+sQAAAAAwMpWCYAXJ3kkSbq7k1x0okVV9eIkB5O8McnPJ7lzpwjY3bd29yXHj717957O7AAAAADALlYJgI/lC6Pf+Tus++okv9zdH+3u/5PkfyZ51RnOBwAAAACcgVUC4KEkVyVJVe1PstP1ug8l2V9Vz6yqPUlemeT31zIlAAAAAHBaVgmA9yQ5UFW3JXlvktur6mBVHdy6qLsfWq79YJLfTvKRLO4ZCAAAAABsyJ7dFnT341V1IMm1Se7q7kMnWfvOJO9c43wAAAAAwBnYNQAmSXcfTXL3UzwLAAAAALBmq1wCDAAAAACcpQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhspQBYVTdX1YNVdW9V7Vth/Zuq6o4zng4AAAAAOCO7BsCquibJFUkuT3JLkpt2Wf+SJDeuZToAAAAA4IyssgPw6iR3dvexJPdlEQNPqKr2JHlPdo+E11XV4ePHkSNHTmVmAAAAAGBFqwTAi5M8kiTd3UkuOsnaf53kniS/dbITdvet3X3J8WPv3r2rzgsAAAAAnIJVAuBj+cLod/6JFlXV5UlenuTWNcwFAAAAAKzBKgHwUJKrkqSq9ifZ6Xrdf5jkeVlcJnxbkldX1dvXMCMAAAAAcJr2rLDmniTXV9VtSa5McntVHUyS7r7j+KLu/pfH/15VVyY52N1vW9ukAAAAAMAp2zUAdvfjVXUgybVJ7uruQyu85/4k95/xdAAAAADAGVllB2C6+2iSu5/iWQAAAACANVvlHoAAAAAAwFlKAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwQRAAAAAABhMAAQAAACAwVYKgFV1c1U9WFX3VtW+k6x7Z1V9oKoeqKofWN+YAAAAAMDp2DUAVtU1Sa5IcnmSW5LctMO6r0ny7O7+20muSvL2qnrmGmcFAAAAAE7RKjsAr05yZ3cfS3JfFjHwr+ju3+zu47v+npvk/3b3kydaW1XXVdXh48eRI0dOZ3YAAAAAYBerBMCLkzySJN3dSS462eKqqiQ/nuSHd1rT3bd29yXHj717957CyAAAAADAqlYJgI/lC6Pf+busf1uSR7r7ztOeCgAAAABYi1UC4KEs7umXqtqfZMfrdavqnyd5WRIfAAIAAAAAXwRWCYD3JDlQVbcleW+S26vqYFUd3LpoGQdvS/LCJL+2/DTgF6x3XAAAAADgVOzZbUF3P15VB5Jcm+Su7j60w7qPZ7WgCAAAAAA8TXYNgEnS3UeT3P0UzwIAAAAArJkdewAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIMJgAAAAAAwmAAIAAAAAIOtFACr6uaqerCq7q2qfSdZ9y1V9VtV9etV9Yr1jQkAAAAAnI49uy2oqmuSXJHk8iRXJrkpyfecYN3zk9yW5GuTHEvy35O8fH2jAgAAAACnapUdgFcnubO7jyW5L4sYeCJXJPn17v50d/9Zks9U1QvXNCcAAAAAcBp23QGY5OIkjyRJd3dVXbTbuqXPJvnSbc8lSarquiTXbXnqyar6k1UGBk5dJXuTHNn0HJzD3l6bngAAgA3yOwkbd278TrLjbftWCYCPJdka/c5fcd2eJCf8r9vdtya5dYWvDaxBVR3u7ks2PQcAAHBu8jsJbNYqlwAfSnJVklTV/uxc7B9I8qpaeGYW9/97eB1DAgAAAACnp7r75AuqLkzywSTvz+JDQN6d5GiSdPcd29b+ZJLnZrH773Pd/Y/XPjFwyvxrGwAAsEl+J4HN2vUS4O5+vKoOJLk2yV3dfegka99YVd+Y5NlJ/tv6xgTOkEvuAQCATfI7CWzQrjsAAQAAAICz1yr3AAQAAAAAzlICIAxVVf+kqi5b4/leXFUH13U+AAAA4OkhAMJAVfVtSb4hycfWeNo/TPLVVfX6NZ4TAAAgSVJV51XVeZueAyYSAGFDquqGqnrL8u8/WFU/X1VdVS9dPndHVR1c/nnX8rlLq+rhXc77vCTfmeR7uvuJ5XP3V9Wlpzjf3qr6/ePv6+5jSf5Fkm+tqhecyrkAAACq6ueq6tXbnntJVb1v+fDbk7y5ql5WVTdW1Y9U1fuq6p897cPCMLt+CjDw1Kqqa5N8axY/7L4lyQ8l+afbln1bVX15ks+tcMpvT/Lj3f3kLl/3rUmu2vb0D275pO8fSfKlW1/s7q6qf5dFYHzHCrMAAABDVNX5Sf5fko+c4OUXJ3ldkqNJ7k7yySQvTPLK7v695Zonkjy+5XyXJflPSb6sqj6Q5PlJLliueV6Sf5vkU93950/JNwTnEAEQNusrk/xAklckOS/J7yX5+qr6m9vW/WoWYfBtK5zzpUl+5kQvVNWBLH6I/r3uvjHJjTuse02Si5M8cIKXP5zEv8ABAMC554kkn+zuV2x/oaruSPIXy+NXu/t1VXV/ks+f5HwPJflHSW7q7tcu7zn+/Czi4V/r7j9a7/hw7nIJMGzWa7P4Afmi5eMnkrwryfXb1v10kgNJLl3hnEeTPOsEz788ybuziH+f2enNVbUvyVuTfO8OS561/BoAAMC5pbc+qKrfrKoXb3nq2PY1Sf5uVX24qj6c5JuSvHv5+BeWtxl6YrnmY0neuTzHsW1f58J1fyNwrhEAYbNuSfKmJD+av/z/8Y4kVyb5si3r/mK5dnsYPJHfyOIH63ZvSfKZJJcki0uAl/cG3Hq8MotI+Jbu/uwO5/+mJB9aYQ4AAGCQ7t4e916U5NO7vO2Xk1ze3V+X5H9kca/yr8viNkjHvT/JtVnchihZXB31+qr6YFV9MslPnfHwcI4TAGGzHuvuX0nyx0m+MUm6+/NJbs1fvT/ff8zi8t7d/FySNyx38m31hiwuN/6xqqruvrG7r9x6JPmjJJcleftyu/5XJfnZqnpRklTVc5J8f5I7T/k7BQAAzmpVVdv+/rnufnSXtz15gnCYbfcsf0GSv5/ka5aPL85iY8J3JvmV7t5+j3TgFAmA8MXhzVnc3+/4D9SfSvKJrQuWYfDf7Hai5Q1y35zk7qr661teOtrdH03ysSTfscN7/7i7928Jgh9N8tru/sOqem6S/5rkrSfZHQgAAMy1tSF8RZI/2Pb6ecs/r1le8vu1JztZVb0qiw8M2Zvk1Un2J/nuJK9J8ifrGBhYqBOEeGCAqvrKJIe7+7E1ne9ZSV7S3b+9jvMBAABnl6p6RpJv7u73VdXtSX43yXOSXJjkF7LYxPDlSd605UNA3tjdH1++/2eT/Ifuvn/LOS9J8n1JfjjJO5LcnOQXs/jgwWNJru/ug0/LNwiD2QEIQ3X3Q+uKf8vzHRX/AADg3LX80I5frKofSvKyJO9J8mNJ/kYW9zW/MMmfZxEGk8VtiB7e6XxV9ewkP5PkI8tzfyrJv8/iE4AfimYBa+N/JgAAAOCkquqZVfWvkvxOFh9YeG13P7HcKPBdSf5Lkg8nebS7b06S7n5keSuj487LX14mnCx2D/5Ed9+9fPwTWVw2fN3y8QXLAzhDLgEGAAAAdlVVr0/yoe7+2A6vX9bdv3OGX+M567ySCVgQAAEAAABgMJcAAwAAAMBgAiAAAAAADCYAAgAAAMBgAiAAAAAADCYAAgAAAMBg/x+Y1V28JW//WQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1600x640 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(20, 8), dpi=80)\n",
    "\n",
    "movie_name = ['KNN（k=4）','贝叶斯']\n",
    "first_day = [0.88,0.68]\n",
    "first_weekend = [0.89,0.91]\n",
    "\n",
    "# 先得到movie_name长度, 再得到下标组成列表\n",
    "x = range(len(movie_name))\n",
    "\n",
    "\n",
    "plt.bar(x, first_day, width=0.2)\n",
    "# 向右移动0.2, 柱状条宽度为0.2\n",
    "plt.bar([i + 0.2 for i in x], first_weekend, width=0.2)\n",
    "\n",
    "# 底部汉字移动到两个柱状条中间(本来汉字是在左边蓝色柱状条下面, 向右移动0.1)\n",
    "plt.xticks([i + 0.1 for i in x], movie_name)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. sklearn库实现 准确率为： 0.89"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率为： 0.89\n"
     ]
    }
   ],
   "source": [
    "from sklearn import metrics\n",
    "from sklearn.neighbors import KNeighborsClassifier \n",
    "knn = KNeighborsClassifier()\n",
    "knn.fit(train_features,train_labels)\n",
    "predicted_labels=knn.predict(test_features)\n",
    "print('准确率为：', metrics.accuracy_score(test_labels, predicted_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQYAAAD5CAYAAADFhptEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAU0UlEQVR4nO3df7AddXnH8fcnl5CQH4TgjUg0QpFoLWMRiBgiZBLGVED80RQNU8SpaFMqo9VOp5ZCq3WUWupgp1SClwlopYOiU6IWJIDAGAkRg7bMYMX4A7Qo1WBI+DGQcM/TP3ZPc+75ce+ee8/mu+fcz2tmJ+fs2ew+ydzz3O8++91nFRGYmTWakToAM6seJwYza+HEYGYtnBjMrIUTg5m1cGIwsxZODANA0kckfSJ/fZyk30i6QdIDDdvcLemP8m1b1icI2yrMiWGASJoJfB64BHgIeJWkU9ps2mm9GeDEMGg+DPwiIjbk7/cAf9Jmu07rzQA4KHUA1jMnAyuBlzes+xKwVtJhTdt2Wm8GeMQwSFYDPwfWNqzbCdwMvLNp207rzQAnhkHyJeAC4C8kHdKw/mranzZ0Wm/mxDBAfhIRdwE/Bv64vjIi7gFGgRMaN+603gxcYxhEHwdGyK5O1G0Armqzbaf1VrI3rJ4bj/9mtNC29z/w3OaIOKPkkMZwYhgwEXGLpP8lO624Ll99PXB5m807rbeS7fzNKN/e/JJC28488sfDJYfTwolhAETER5ren9T0/klgfpu/13a9HQjBaNRSB9GRE4NZAgHUqG6TJCcGs0RqeMRgZg2CYLTCbRWdGMwS8amEWUVIOhw4CfheROxMFUcAoxVODJ7gVCGSjpC0JXUcRUhaIOnrkm6XdJOkg1PHNBFJR5JNBT8ZuEvSolSxBLAvaoWWFAY2MUjaKGmrpEtTx1KEpIXA54C5qWMp6DzgiohYAzwGHNAJOJN0HPDBiPg4sBk4MWUwtYJLCgOZGCStBYYiYgWwWNLS1DEVMAqsI7sluvIi4qqIuD1/uwj4Vcp4ioiIOyJim6SVZKOGe5PFQjBacElhUGsMq4Ab89d3AqcCO5JFU0BE7AGQlDqUruQNXxZGxLbUsRSh7D94HbCPLBmnETBa3RLDYI4YyIbjj+av9wBHJIxlYOWFvCvJpl/3hchcBGwFzk4WB9U+lRjUEcNTQP3W43kMbgJMJi823ghcHBGPpI6nCEkfAn4ZEf8KHAY8kTAaRqnu6HBQvzD3k50+ABwPPJwulIH1brLLfpfkDWXXpQ6ogBHgfEnfBIaA21IFEkAtii0pDOqIYROwRdJi4ExgedpwiouIValjKCLvK7lhwg0rJCJ2AWtSxwFZYthb4d/L1Y1sCvJC3ipgG7A6InanjcisVS1UaElhUEcM9d8ON064oVkC2czH6tYYBjYxmFVZIEYrPGCvbmQ9IGl96hi60W/xgmOeiiqfSgx0YgAq8QPQhX6LFxzzpNRPJYosKfhUwiwJMRrV/b2cPDEMHz4URy+ZWcq+X/rig1h2/OyeXwn+4QNzer1LAGYzh0N1eIUnyrZyzPs9ya6dEVHojs0A9jHU6xB6JnliOHrJTO7bvCR1GF15w0tOmngjm5pautsYJuuO+HLhGaAR1R4xVDcyswFXQ4WWIhp7eUiaKek/8rYDF3RaNx4nBrMEsuLjjELLRNr08ngfsD1vO3C2pPkd1nXkxGCWRHYqUWQBhiVtb1iar6o09/JYxf7JfVuBZR3WdZS8xmA2HWW3XRf+vbwzIjp+kdv08mjXdqCrVgRODGYJBGJvlHZVot52YDdZ24GnOqzryKcSZonUYkahZRLatR3oqhWBRwxmCdSLjyX5HHCLpNOA3wG+TXYa0byuI48YzBIIxGgUWwrvM+/lkXfUWgPcA7w+IkbbrRtvXx4xmCXSRfGxaxHxC5raDrRb14kTg1kCEVR65qMTg1kSxWc1puDEYJZAAHujul+/6kZmNsCCdE1YinBiMEukyq3dnBjMEsieK+HEYGZjVPtJVE4MZgl4xGBmbXnEYGZjRIh9tep+/Uoby0jamLeRurSsY5j1q6wfQ+9au/VaKYlB0lpgKG8jtVjS0jKOY9a/uurgdMCVddRV7L9Z40723wcOZE8Cqrep+vXj/dcN2GyqsuLj9HsS1bhtpCJiJCKWRcSyRS+obm99szL1qhlsGcqqftTbSEHWRqq612XMEqj6lOiyvrBdtZEym45qzCi0pFDWiGETsEXSYuBMYHlJxzHrSxGwr1bdgXQpkeXtrFcB24DVEbG7jOOY9avsVKK0ZrBTVtoMi4jYRcE2UmbTkWc+mtkY9cuVVeXEYJaEfBOVmbVyz0czGyPrEu3EYGYNAvF8rbqzfp0YzBLxqYSZjeGrEmbWlq9KmNlYCW+pLsKJwSyBegenqnJiMEvEIwYzGyOA53t0d6WkhcC/AfOBByPiQkkbgVcCt0TEx7rdZ3WrH2YDrN6opUet3c4Hro+I04D5kv6SKfZc9YjBLJEuagzDkrY3vB+JiJGG948Dr5B0GLAE2E1rz9Ud3cTmxGCWQnRVY9gZEcvG+fxbwBuB9wM/AGYxtufqsd2Glzwx7Pj+fM561empw+jK02v7qxv+vK98L3UIXYvaYHcP7/EEp8uACyNij6Q/Bz4OXJN/Nqmeq64xmCXSwxrDHOBVkoaA1wKfYIo9V5OPGMymo0CM9q7n498D1wFHAfcCn2KKPVedGMwS6dUEp4i4DziucZ2kVcAa4PLJ9Fx1YjBLILorPk5i/1PruerEYJZIeOajmY3lm6jMrA2PGMxsDDdqMbNWbgZrZs0Cn0qYWQsXH82sjYjUEXTmxGCWiE8lzGyMCCcGM2vDNQYza1GrOTGYWYNAPpUws1YVvijhxGCWRMWLj6W1dpN0hKQtZe3frO9FwSWBUkYM+QMwPgfMLWP/ZoNgOo4YRoF1ZK2rW0haL2m7pO17a8+WFIJZtWVzGSZeUihlxBARewCk9hkxf1jGCMCCmYuqXIMxK0UERO+awfaci49mifheCTNr5cRgZmNN4wlOEbGqzP2b9TWPGMxsjIpPcHJiMEvFIwYza+ERg5m1qPCIYdwZFpJmSGo7rTn/7O3lhGU24IJsxFBkSWCiEcPRwDmSvgMsbPpMwPlM4cGZZtNZP09wep7svoe/AbYARwArge8CO6j0YMis4ir87emYGCQdBHwMmA8cCdwMLAVeAdwH3AOcdABiNBtMFS4+TnQXxxZgb9N20fSnmXUrQLViS1GSrpL0pvz1RklbJV06mfA6jhgi4nlJtwELgEXAlcAhZKOHI4E/BH41mYOaWW8Li5JOA14UEV+TtBYYiogVebJYGhE7utnfRDWGJcB/RsQn2wQyg+z0wswmo/iYe1jS9ob3I3nrAgAkzQSuAW6R9BZgFfsvCtwJnEpWEyxsvBrDLOAS4FlJp7fZZAbwaDcHM7MGxRPDzohYNs7n7wS+D1wOvA+4CNiYf7YHOLbb0MY7lXgOOFPSMcBlwO8CHwAezzcRMKvbA5pZrndVuhPIRhGPSboeWEF22g8wj0l0aptw5mNE/AQ4V9I5wM8i4gfdHsTMmtQnOPXGj4Bj8tfLyOYfnQpsA44HHup2h4WnREfElwEkvS4i7slfz8pHFmbWJfVuxLARuFbSucBMshrDVyUtBs4Elne7wwmHGJJ2NBU+LsvX/z7w4W4PaGa5HrWPj4gnI+JtEbEyIk6JiEfIksM2YHVE7O42tCIjhocjYk3D+6clDQEXA2/s9oAtRmvE089MeTcH0tx/3z7xRhXy3O+dmDqErs36xn+lDqF7e7vbvIcjhhYRsYsp3K5QJDGEpOPI7pX4Yb7uHcBXIuLXkz2w2bRX4ZmP412unAmcQzYl+pXAaWQTm14D3A186gDEZzaYEj5lqojxagzDwBrg+bzwuDsi3g5sBw4D3l9+eGYDrMKPqOuYGCLilxFxAdkEp5OB2ZLOBhQRfw2cLemFBypQs0GjKLakUGTiQ5DVFj4LnAfUb+vYCJxbTlhm00A/jhgaHEV2l+Ue4KPAnHz9ZrLag5l1SSXcXdlLRWY+vqLxvaR/lHRBRFwr6c/KC81swFX4qsREPR9PyesK/y8ibgbOk3QY8JkSYzMbbBU+lZhoxDADGJL0APAs2Y1TQXZ68S7grnLDMxtcqQqLRUxUY6iH/jhZ74UngG8ADwAvB64vLTKzQdfHI4Y3Az+jNcyIiD8tMzCzgZbwUmQRHUcMeYemOWTJAbLTiMbP2z5vwswKqvCIYbwJTjXgi8CG+qqGPwVcLWm43PDMBleVL1cW7exyKNl93fOB1WTdnD4DXFhSXGaW0EQ1hiHg4OZ+c5LujIhv5V2dzGwyKlxjmCgx3ENTbSF3DUBEfKDXAZlNCxUvPo6bGCJitMP6G8oJx2wa6dfEYGYlcmIws0aij08lJkvSAuAL+f6fAtZFRJcd8cwGWKS7FFlE1w+iKOg84Iq8iexjwBklHcesf1V4glMpI4aIuKrh7SL88FuzVtPtVKJO0inAwojY1rR+PbAeYLZnVts0Ne1qDACSDgeuBP6g+bP8Sb0jAAtmvKDC/z1mJarwT35ZxceDyR52cXH+VBwza9TH7eOn4t3AScAlku6WtK6k45j1rSrfRFVW8XED++/KNLM2pmWNwcwm4MRgZmNUvMbgxGCWgGh/23JVlFV8NLOJ9HDmo6QjJH0vf71R0lZJl042NCcGs0R6/OzKTwKHSFoLDEXECmCxpKWTic2JwSyVWsEFhiVtb1jWN+5G0unA02T3Ja0im0MEcCdw6mRCc43BLIXuRgM7m9sr1uWTCf8WeCuwCZgLPJp/vAc4djLhOTGYpdKbqxJ/BXw6Ip6QBFmbg0Pyz+YxybMCJwazRHo0wen1wOmSLgJeDbwU+DmwDTgeeGgyO3ViMEulB4khIlbWX0u6m+wBUVskLSZ75MPyyezXxUezRHp8VYKIWBURe8gKkNuA1RGxezKxecRglkKJMx8jYhf7r0xMihODWQKi2j0fnRjMUvG9EmbWTFHdzODEYJaC7640s3bcqGUcEUFt777UYXRl6NB5qUPoysG3fid1CF0bOmpJ6hC693CX2zsxmFkzjxjMbKyKP6LOicEsFY8YzKzRtHzatZkV4HkMZtbMIwYzG8sTnMysHV+VMLMWTgxmNlbg4qOZtXLx0cxaOTGYWSNPcDKzVhGuMZhZK1+VMLMWPpUws7ECqFU3MzgxmKVS3bxQ3pOoJB0uaY2k4bKOYdbPev0kql4qJTFIOhK4GTgZuEvSojKOY9bX6lcmJloSKOtU4jjggxGxTdJC4ERgc0nHMutL0674GBF3AEhaSTZq+GgZxzHrVwrQdCw+ShKwDtgHjDZ9th5YDzCbOWWFYFZtFZ7HUFrxMTIXAVuBs5s+G4mIZRGxbCazygrBrNIUUWhJoazi44ckvTN/exjwRBnHMetb0cWSQFkjhhHgfEnfBIaA20o6jlmfKnhFosCIQdICSV+XdLukmyQdLGmjpK2SLp1MdKUkhojYFRFrImJlRLw3osJ3i5gl0sN5DOcBV0TEGuAx4FxgKCJWAIslLe02Ns98NEul+O/LYUnbG96PRMTI/t3EVQ2fLQLeAfxT/v5O4FRgRzehOTGYpRCg0cKJYWdELJtoI0mnAAvJHq/7aL56D3Bst+GVdlXCzCbQw+KjpMOBK4ELgKeAQ/KP5jGJ77kTg1kivbpcKelg4Ebg4oh4BLif7PQB4HiyEURXnBjMUundvRLvBk4CLpF0N1nnuPMlXQG8ney+pa64xmCWQtCzmY8RsQHY0LhO0leBNcDlEbG72306MZglIMqd1RgRu8hOLybFicEslQpP73FiMEshgOKXKw84JwazRFLdIFWEE4NZKk4MZjaWHzhjZs38tGsza6vCHZycGMwScfHRzMYKYLS6QwYnBrMkXHwc15Ps2nnH6BcfKWn3w8DOnu91V8/3WFdOvOUqJ+aHe77HRmX9Px/V1dZODJ1FRGlPqZK0vUiDi6rot3jBMU+JE4OZjeGnXZtZq4Bw8TGVkYk3qZSu4pU0ExiNyH7CJB1EdnV8bkQ82eHvHAPsym/LRdLsiHi2YX9ExL6yYq6I9DFX/KrEQHdwauyk2w8milfSafmzA74m6VGyzj1fkfS4pE3AJmAFcIekVZK+JOmzkr4o6YR8NxcAJzTsdpOklZKOBt4FXCvpaEkvyxPNlGKuosrEPA2fdm0liIgtkv4BOAO4NiJuAq6WtDki3lrfTtJZZN2CR4FLyJ47MCzpNrJHBtZHGC8DngNmA28DXgPMAs4h+9n4NNB25GE9UOHi40CPGAbUM8BrI+ImScsl3Qc8IulqSQ9IWg68LiJ+lG9/NdljAvcBe5v2dRnw38AdwFlkI4nfJnvW6Hc6nY5YL/TuSVRl8Iihj0g6j+wp4cqbft4K3ELWLvxe4CXAg8DHJP0g/2ujZM8WaN7X28g6CP80ImqS5gLn5x+/kWzEYWUJoOYag/XGDcAqsocE3wf8Il//IvIJO/lv+TcBPyXrFjwTeD5/3ehB4AMN7w8hezDJscALS4jdmnnEYL3QcPUB4GLgROAYYAnwM/Z/+d8CvJwsIRxKVieoJ4n6vr4vaU7D7o8E3pO/fhFwe1n/DstVuMbgxNCnImJU0jPAI8BKsiLivZJmAO8nKyC+GlgL/BZwDdkI8dS2O8xGHJvy1yeXFrhlIojR0dRRdORTiT6jbLgggIh4EJgPfAP4fP7ne8iuPDwJfBT4CPAscCHwEFlxsf4TKWCGpCFgN/CtfPlhfqyhA/FvmrZqUWxJwCOGPpI/imwrcEM+x+BfyL7c7yWrEdxIlghuJKsT/F1E/I+ky8hOFY4AvktWn4Ds0uQwWQHz1/nfrXsN2c/HF0r9R01nFT6VUFQ4OBufpBdHxKMN7+cAz0VEdceoBsCCoeE4Zd6bC227ec919x/om748YuhjjUkhf/9MqlhsEir8S9mJwSyRqPA8BicGsyTcwcnMmgVQ4cuVTgxmCQQQFW7U4nkMZilE3qilyFKApI2Stkq6tBfhOTGYJRK1KLRMRNJaYCgiVgCLJS2damyex2CWgKRbySaXFTGbbPZq3UhjsxlJ/wzcGhG3SDoHmB8R100lPtcYzBKIiDN6uLu5QH1Oyx6yO2SnxKcSZv3vKbIp8QDz6MH32onBrP/dz/67Zo+nB4/rcY3BrM9JOhTYQnZ37ZnA8ojYPaV9OjGY9T9JC4E1wDcj4rEp78+JwcyaucZgZi2cGMyshRODmbVwYjCzFk4MZtbi/wCDfVMfNUIbbwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 288x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "confusion_metrix(test_labels,predicted_labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "以下为朴素贝叶斯算法训练上面处理好的数据，分为手写算法实现与sklearn库实现"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1.手写算法实现  准确率为： 0.68，平滑处理误差太大"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "def trainNB0(trainMatrix,trainCategory):\n",
    "    numTrainDocs = len(trainMatrix)                            #计算训练的文档数目\n",
    "    numWords = len(trainMatrix[0])                            #计算每篇文档的词条数\n",
    "    pAbusive0 = sumClass('女性')/float(numTrainDocs)        #文档属于女性的概率\n",
    "    pAbusive1 = sumClass('体育')/float(numTrainDocs)\n",
    "    pAbusive2 = sumClass('文学')/float(numTrainDocs)\n",
    "    pAbusive3 = sumClass('校园')/float(numTrainDocs)\n",
    "    p0Num = np.ones(numWords); p1Num = np.ones(numWords)    #创建numpy.ones数组,词条出现数初始化为1,初始化为1的原因就是做拉普拉斯平滑处理\n",
    "    p2Num = np.ones(numWords);p3Num = np.ones(numWords)\n",
    "    p0Denom = 4; p1Denom =4                           #分母初始化为4，拉普拉斯平滑处理\n",
    "    p2Denom =4; p3Denom =4\n",
    "    for i in range(numTrainDocs):\n",
    "        if trainCategory[i] == '女性':                            #统计条件概率所需的数据\n",
    "            p0Num += trainMatrix[i]\n",
    "            p0Denom += np.sum(trainMatrix[i])\n",
    "        elif trainCategory[i] == '体育':                                               \n",
    "            p1Num += trainMatrix[i]\n",
    "            p1Denom += np.sum(trainMatrix[i])\n",
    "        elif trainCategory[i] == '文学':\n",
    "            p2Num += trainMatrix[i]\n",
    "            p2Denom += np.sum(trainMatrix[i])\n",
    "        else:   #校园\n",
    "            p3Num += trainMatrix[i]\n",
    "            p3Denom += np.sum(trainMatrix[i])\n",
    "    p0Vect = np.log(p1Num/p1Denom)     #女性                                 \n",
    "    p1Vect = np.log(p0Num/p0Denom)     #体育\n",
    "    p2Vect = np.log(p2Num/p2Denom)     #文学\n",
    "    p3Vect = np.log(p3Num/p3Denom)     #校园\n",
    "    return p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train=np.array(train_data)\n",
    "# p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3=trainNB0(train_data,train_labels)\n",
    "# p0Vect,p1Vect,p2Vect,p3Vect"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "#查询测试集中该label的个数\n",
    "def sumClass(label):\n",
    "    sum=0\n",
    "    for i in range(len(train_labels)):\n",
    "        if train_labels[i] == label:\n",
    "            sum=sum+1\n",
    "    return sum"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "#求最大后验\n",
    "def classifyNB(vec2Classify, p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3):\n",
    "    p0 = sum(vec2Classify*p0Vect)+np.log(pAbusive0)               #对应元素相加\n",
    "    p1 = sum(vec2Classify*p1Vect)+np.log(pAbusive1)\n",
    "    p2 = sum(vec2Classify*p2Vect)+np.log(pAbusive2)\n",
    "    p3 = sum(vec2Classify*p3Vect)+np.log(pAbusive3)\n",
    "   # print(sum(vec2Classify*p0Vect))\n",
    "    p=[p0,p1,p2,p3]\n",
    "    index=p.index(min(p))\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(vec2Classify, p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3):\n",
    "    p0 = sum(vec2Classify*p0Vect)+np.log(pAbusive0)               #对应元素相加\n",
    "    p1 = sum(vec2Classify*p1Vect)+np.log(pAbusive1)\n",
    "    p2 = sum(vec2Classify*p2Vect)+np.log(pAbusive2)\n",
    "    p3 = sum(vec2Classify*p3Vect)+np.log(pAbusive3)\n",
    "   # print(sum(vec2Classify*p0Vect))\n",
    "    print('p0:',p0)\n",
    "    print('p1:',p1)\n",
    "    print('p2:',p2)\n",
    "    print('p3:',p3)\n",
    "    p=[p0,p1,p2,p3]\n",
    "    index=p.index(min(p))\n",
    "    #print(index)\n",
    "    return p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#求最大后验\n",
    "def classifyNB2(vec2Classify, p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3):\n",
    "    p0 = sum(vec2Classify*p0Vect)+np.log(pAbusive0)               #对应元素相加\n",
    "    p1 = sum(vec2Classify*p1Vect)+np.log(pAbusive1)\n",
    "    p2 = sum(vec2Classify*p2Vect)+np.log(pAbusive2)\n",
    "    p3 = sum(vec2Classify*p3Vect)+np.log(pAbusive3)\n",
    "   # print(sum(vec2Classify*p0Vect))\n",
    "    print('p0:',p0)\n",
    "    print('p1:',p1)\n",
    "    print('p2:',p2)\n",
    "    print('p3:',p3)\n",
    "    p=[p0,p1,p2,p3]\n",
    "    index=p.index(min(p))\n",
    "    #print(index)\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'p0Vect' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-46-9090a5a4dac1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpredict\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mclassifyNB2\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mp0Vect\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mp1Vect\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mp2Vect\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mp3Vect\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpAbusive0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpAbusive1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpAbusive2\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpAbusive3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'p0Vect' is not defined"
     ]
    }
   ],
   "source": [
    "predict=classifyNB2(test_data[i],p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3)\n",
    "np.sum(train_data[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p0: -36.17565918345063\n",
      "p1: -31.311872027371344\n",
      "p2: -34.220396960788854\n",
      "p3: -30.910005826440077\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -30.68605414115486\n",
      "p1: -28.11323344517209\n",
      "p2: -28.739373656490546\n",
      "p3: -26.538047395930896\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -34.34680154748678\n",
      "p1: -33.09861173100824\n",
      "p2: -32.37557628533935\n",
      "p3: -29.77285032653704\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -41.31575442009705\n",
      "p1: -37.52541891904664\n",
      "p2: -38.426788851985606\n",
      "p3: -34.777758718381946\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -23.83375953706762\n",
      "p1: -24.0182031791697\n",
      "p2: -23.498245504228496\n",
      "p3: -21.584290251720724\n",
      "预测值：体育  真实值： 女性\n",
      "p0: -24.62537020166444\n",
      "p1: -22.97065130323541\n",
      "p2: -23.375442591190648\n",
      "p3: -21.66305736972828\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -28.42767494555505\n",
      "p1: -25.019352018721055\n",
      "p2: -26.59119455663882\n",
      "p3: -24.484969402646783\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -43.715002013119424\n",
      "p1: -37.17411027843383\n",
      "p2: -40.455323417364426\n",
      "p3: -37.00290102995738\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -42.134095884087714\n",
      "p1: -38.958225303683186\n",
      "p2: -39.45835508959163\n",
      "p3: -36.35089055117968\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -29.50815444819915\n",
      "p1: -25.6671745383004\n",
      "p2: -27.522822140049016\n",
      "p3: -25.903741077797648\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -38.1516417326965\n",
      "p1: -34.39308416299013\n",
      "p2: -35.71499114622233\n",
      "p3: -33.27162316760787\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -48.36646766350099\n",
      "p1: -45.889250252550454\n",
      "p2: -44.875262043369766\n",
      "p3: -41.83733384591037\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -43.75476237762517\n",
      "p1: -38.071577662607936\n",
      "p2: -41.16944207673498\n",
      "p3: -36.885823294779215\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -53.10018542104337\n",
      "p1: -46.437877247456946\n",
      "p2: -49.936430140379855\n",
      "p3: -45.02043588433372\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -47.42255911197533\n",
      "p1: -42.51740849816038\n",
      "p2: -44.23497278021304\n",
      "p3: -39.69119416612866\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -45.18553419335119\n",
      "p1: -42.549281899149314\n",
      "p2: -42.136881844217044\n",
      "p3: -38.41617043495056\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -44.45135338134436\n",
      "p1: -40.639639743002554\n",
      "p2: -41.48465351039313\n",
      "p3: -37.62199544630888\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -22.878092908644994\n",
      "p1: -19.695166795953995\n",
      "p2: -20.78331883190501\n",
      "p3: -20.308852063575102\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -33.26248341433486\n",
      "p1: -31.21856199153369\n",
      "p2: -31.21482423758171\n",
      "p3: -29.03463002034656\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -38.56207851918607\n",
      "p1: -32.49839604841018\n",
      "p2: -35.95379525846991\n",
      "p3: -32.726334857411544\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -37.821790789738664\n",
      "p1: -32.74317544885453\n",
      "p2: -35.43796910202281\n",
      "p3: -32.2261249943527\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -44.395000633175805\n",
      "p1: -38.05940748894655\n",
      "p2: -41.825300684318854\n",
      "p3: -37.98723173423992\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -42.78453639324603\n",
      "p1: -39.23312798447382\n",
      "p2: -40.39307563865248\n",
      "p3: -36.57142421247456\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -39.875590600300825\n",
      "p1: -33.78654246081627\n",
      "p2: -37.58739588829014\n",
      "p3: -33.786793798566194\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -30.69093800853673\n",
      "p1: -29.707453670466688\n",
      "p2: -29.754099011313105\n",
      "p3: -28.12859123419191\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -28.009887669306917\n",
      "p1: -25.391345040866515\n",
      "p2: -26.495492762291658\n",
      "p3: -24.44096520101845\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -37.71016023279728\n",
      "p1: -34.587664376670304\n",
      "p2: -35.70380109844789\n",
      "p3: -33.03064210802889\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -34.64842745669712\n",
      "p1: -31.814806215630057\n",
      "p2: -32.46874899554833\n",
      "p3: -29.807359324119936\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -36.74531286031327\n",
      "p1: -32.66729220566411\n",
      "p2: -34.7182779782836\n",
      "p3: -31.09926476255584\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -33.781925093758545\n",
      "p1: -29.551244320123633\n",
      "p2: -31.689939200865464\n",
      "p3: -28.86646289323327\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -28.557027794540925\n",
      "p1: -27.03857573613987\n",
      "p2: -27.038853129372484\n",
      "p3: -25.447321382692827\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -20.608429221298017\n",
      "p1: -19.222474223705625\n",
      "p2: -18.942826863670145\n",
      "p3: -18.112577720082616\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -34.48678031724142\n",
      "p1: -31.32032166302552\n",
      "p2: -32.36007096926233\n",
      "p3: -30.23278731521826\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -15.787223133984886\n",
      "p1: -14.633615574926756\n",
      "p2: -15.693004114498999\n",
      "p3: -15.192421812919427\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -35.257839759946\n",
      "p1: -32.86215665825811\n",
      "p2: -33.13453250895606\n",
      "p3: -31.317217109854603\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -21.55039363447027\n",
      "p1: -19.70411177030922\n",
      "p2: -19.912828983560342\n",
      "p3: -19.17895505686451\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -38.986868860250844\n",
      "p1: -36.95166641438495\n",
      "p2: -36.84354084156294\n",
      "p3: -34.28321253894234\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -51.89471334268947\n",
      "p1: -46.97712515886486\n",
      "p2: -48.482158575675015\n",
      "p3: -43.270345230616066\n",
      "预测值：女性  真实值： 女性\n",
      "p0: -18.891383681142464\n",
      "p1: -21.002312129118792\n",
      "p2: -21.26990649751208\n",
      "p3: -19.982825826138132\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -20.342155970527305\n",
      "p1: -22.21353031193548\n",
      "p2: -21.6302475952049\n",
      "p3: -20.278940110577707\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.805704520181074\n",
      "p1: -34.31115493740033\n",
      "p2: -33.47826551916961\n",
      "p3: -30.793527100870648\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.62596081749076\n",
      "p1: -34.73302744125655\n",
      "p2: -33.80463958002102\n",
      "p3: -30.591580515381807\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -38.65033307685248\n",
      "p1: -39.39652705020174\n",
      "p2: -38.555877459625776\n",
      "p3: -34.80803933953448\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -25.95847428988291\n",
      "p1: -29.189050111362583\n",
      "p2: -27.721370789921917\n",
      "p3: -26.070186804759423\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -17.663401425430386\n",
      "p1: -19.623671795573593\n",
      "p2: -19.72021715338662\n",
      "p3: -18.250914847893494\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -27.13071694101773\n",
      "p1: -28.268437871937643\n",
      "p2: -27.397994366523076\n",
      "p3: -25.29207791563624\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.486158946109303\n",
      "p1: -24.062549777507037\n",
      "p2: -23.708669385545353\n",
      "p3: -22.512548156158175\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -29.60403126511712\n",
      "p1: -32.18518982589823\n",
      "p2: -31.297601680511107\n",
      "p3: -28.665670479640788\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -24.192591122337557\n",
      "p1: -24.97443404574611\n",
      "p2: -24.12093723780459\n",
      "p3: -22.36434234692627\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.217212156983376\n",
      "p1: -25.116588349712043\n",
      "p2: -24.222159264915366\n",
      "p3: -22.75803783035643\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.147620717100722\n",
      "p1: -23.394788620808157\n",
      "p2: -23.057248105975997\n",
      "p3: -21.471250000229297\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.949752951624273\n",
      "p1: -24.283283314569374\n",
      "p2: -23.476125238279252\n",
      "p3: -21.97421526952622\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -21.070151800149638\n",
      "p1: -23.513479103076744\n",
      "p2: -22.50296488865191\n",
      "p3: -21.30022747491929\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.872221157432776\n",
      "p1: -27.101350092681816\n",
      "p2: -26.336954759496038\n",
      "p3: -24.311433517824405\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -35.89030617648947\n",
      "p1: -38.8576155954909\n",
      "p2: -37.657025123930474\n",
      "p3: -34.1279402528598\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.93816651863392\n",
      "p1: -36.15569018562575\n",
      "p2: -34.60228167228073\n",
      "p3: -32.047448385504886\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -29.15921224681648\n",
      "p1: -31.261064661433476\n",
      "p2: -30.296670337283647\n",
      "p3: -27.820523660489442\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -27.69949507287714\n",
      "p1: -29.50637106959834\n",
      "p2: -28.60104407068074\n",
      "p3: -26.34778561655893\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -44.92564210283177\n",
      "p1: -47.606001664934716\n",
      "p2: -45.77417934038429\n",
      "p3: -41.318332156766786\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -17.44328366785749\n",
      "p1: -19.13276257199416\n",
      "p2: -18.64959725915611\n",
      "p3: -17.687337952379668\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -17.713338166195005\n",
      "p1: -19.205488435870528\n",
      "p2: -18.95185251691797\n",
      "p3: -17.76289657798197\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -37.87364506842398\n",
      "p1: -39.40949502639702\n",
      "p2: -38.21608903079769\n",
      "p3: -34.422611587351646\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -25.851669806742965\n",
      "p1: -27.157222339141025\n",
      "p2: -26.478830430720365\n",
      "p3: -24.807516718350307\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.25880966427854\n",
      "p1: -19.80112902069434\n",
      "p2: -19.300654617639285\n",
      "p3: -18.399685520520087\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -9.8628903145391\n",
      "p1: -12.702645220947732\n",
      "p2: -12.776437893902104\n",
      "p3: -12.360037691774185\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -19.0464549529544\n",
      "p1: -19.80504237699989\n",
      "p2: -18.892602497241377\n",
      "p3: -18.744741951417296\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -29.903626754927334\n",
      "p1: -31.942060060752265\n",
      "p2: -31.307900363334216\n",
      "p3: -28.621545406180132\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -30.92962250828654\n",
      "p1: -31.334187157033828\n",
      "p2: -30.848989914253753\n",
      "p3: -28.167312472949426\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.418560899286238\n",
      "p1: -22.23835146236972\n",
      "p2: -21.684017714906865\n",
      "p3: -20.23552350878508\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -30.884076034211056\n",
      "p1: -35.690683008062905\n",
      "p2: -34.27904087970311\n",
      "p3: -31.83885379514001\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -34.44317710437995\n",
      "p1: -36.97240718815158\n",
      "p2: -36.23929107577337\n",
      "p3: -32.56023435589004\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.76718847353833\n",
      "p1: -35.851386686230995\n",
      "p2: -34.430932622050406\n",
      "p3: -31.76679831440729\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -17.89349559088257\n",
      "p1: -19.077923169506754\n",
      "p2: -18.32308625817873\n",
      "p3: -17.87273336791103\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.347913036104057\n",
      "p1: -20.660038233040627\n",
      "p2: -20.481518659426637\n",
      "p3: -19.364231994508202\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.90975149235194\n",
      "p1: -24.938314417227936\n",
      "p2: -24.371328834384688\n",
      "p3: -22.846052254036167\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -42.132508190907195\n",
      "p1: -43.31777107886336\n",
      "p2: -42.59832157149696\n",
      "p3: -38.5505459240851\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -28.916941563732095\n",
      "p1: -28.22122736471187\n",
      "p2: -27.508852810704088\n",
      "p3: -25.229368341674295\n",
      "预测值：女性  真实值： 体育\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p0: -37.84482406548318\n",
      "p1: -38.619026521921114\n",
      "p2: -37.76982771900751\n",
      "p3: -34.229936949209566\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -45.24190475733095\n",
      "p1: -47.86839370855754\n",
      "p2: -46.48491520183685\n",
      "p3: -41.998240405882086\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -34.14233855513102\n",
      "p1: -35.919388378076754\n",
      "p2: -34.44507960382461\n",
      "p3: -31.487268896597513\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.639830966191155\n",
      "p1: -21.110321833147502\n",
      "p2: -20.732450739084424\n",
      "p3: -19.27983816361605\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.24631252266685\n",
      "p1: -20.970152555956247\n",
      "p2: -20.628235112061656\n",
      "p3: -19.209600705298506\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.24631252266685\n",
      "p1: -20.970152555956247\n",
      "p2: -20.628235112061656\n",
      "p3: -19.209600705298506\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.24631252266685\n",
      "p1: -20.970152555956247\n",
      "p2: -20.628235112061656\n",
      "p3: -19.209600705298506\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -34.48773799476341\n",
      "p1: -35.73468644332047\n",
      "p2: -34.80461314145058\n",
      "p3: -31.886368354572525\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -28.086863898600146\n",
      "p1: -31.153154907868405\n",
      "p2: -30.33600096961298\n",
      "p3: -27.76905432971421\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -24.896757535281687\n",
      "p1: -27.858038047745215\n",
      "p2: -27.353206203805296\n",
      "p3: -25.160004424574424\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -47.34242751709562\n",
      "p1: -50.67128646519395\n",
      "p2: -49.49293619490309\n",
      "p3: -44.53426979725908\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -10.480579697414779\n",
      "p1: -12.655889842507387\n",
      "p2: -12.619128159973952\n",
      "p3: -12.321301238588083\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -43.24621773381389\n",
      "p1: -47.225950177987436\n",
      "p2: -45.72015867498354\n",
      "p3: -41.14399903053642\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -18.38110917064671\n",
      "p1: -19.219894783333213\n",
      "p2: -18.3255214669702\n",
      "p3: -17.65112696788288\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -18.38110917064671\n",
      "p1: -19.219894783333213\n",
      "p2: -18.3255214669702\n",
      "p3: -17.65112696788288\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -38.757800492545385\n",
      "p1: -41.85177782802683\n",
      "p2: -40.27029193766227\n",
      "p3: -36.76121047027153\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -30.42843327677629\n",
      "p1: -32.88849590823667\n",
      "p2: -31.172553786094458\n",
      "p3: -28.982697252652194\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.800875297540156\n",
      "p1: -24.69150625669008\n",
      "p2: -23.86682670334497\n",
      "p3: -22.013256887802093\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -38.271932352921716\n",
      "p1: -37.29652761457091\n",
      "p2: -37.25877893839396\n",
      "p3: -33.455625779320414\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -41.572155531760146\n",
      "p1: -41.552881031762\n",
      "p2: -40.907895610386184\n",
      "p3: -36.690272118975976\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -22.648232971267902\n",
      "p1: -25.303420952186194\n",
      "p2: -24.694697071815217\n",
      "p3: -22.70445742102092\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -35.135441739326104\n",
      "p1: -38.12852941673045\n",
      "p2: -36.50612866843129\n",
      "p3: -33.50795601223552\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -26.59235539785997\n",
      "p1: -29.190516711911847\n",
      "p2: -28.51907604767856\n",
      "p3: -26.017306280232432\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -21.2188758419049\n",
      "p1: -20.688126335234436\n",
      "p2: -20.487273371397904\n",
      "p3: -19.45224897949793\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -16.664243611272475\n",
      "p1: -17.498895340394906\n",
      "p2: -17.376237709574852\n",
      "p3: -16.333692155032445\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -57.46220045374441\n",
      "p1: -63.032437805842605\n",
      "p2: -60.498514633169876\n",
      "p3: -54.60689920786062\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -40.47763620616197\n",
      "p1: -42.97685055756789\n",
      "p2: -41.59434670251313\n",
      "p3: -37.42653906446497\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -23.178005660438398\n",
      "p1: -23.00914134020061\n",
      "p2: -22.321269752646323\n",
      "p3: -21.442247394534405\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -36.403055780320244\n",
      "p1: -37.2748367703932\n",
      "p2: -36.402086206661856\n",
      "p3: -32.67123831245198\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -22.826956957781103\n",
      "p1: -24.10097741784384\n",
      "p2: -23.513165418012534\n",
      "p3: -22.01241223141804\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -41.86458957686726\n",
      "p1: -44.8705720292108\n",
      "p2: -43.328680259690636\n",
      "p3: -39.2825696845287\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -17.87541437259319\n",
      "p1: -20.807957068357798\n",
      "p2: -20.601342668174055\n",
      "p3: -19.284374848988932\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -20.647852365911806\n",
      "p1: -22.086455211103818\n",
      "p2: -21.439105918934644\n",
      "p3: -20.439841334474856\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -12.301173568336871\n",
      "p1: -13.496516612740368\n",
      "p2: -13.53627067463825\n",
      "p3: -13.180283707816178\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -26.669979067206985\n",
      "p1: -28.341300974795672\n",
      "p2: -27.77724485737201\n",
      "p3: -25.299720882772007\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -24.35908757091376\n",
      "p1: -27.65888727001996\n",
      "p2: -26.628864836217478\n",
      "p3: -24.60886149761871\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -29.064972120053667\n",
      "p1: -32.50268035038588\n",
      "p2: -30.905510748687057\n",
      "p3: -28.36420373244212\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -21.307121261302033\n",
      "p1: -23.928236474199835\n",
      "p2: -22.954391062052547\n",
      "p3: -21.288422914163995\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -30.025366613016146\n",
      "p1: -33.36085590558598\n",
      "p2: -31.918903499843182\n",
      "p3: -29.236619436324077\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -32.6540819043164\n",
      "p1: -33.752261507372836\n",
      "p2: -31.089909581414325\n",
      "p3: -29.826969041860202\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -30.729965203360155\n",
      "p1: -32.318895235724234\n",
      "p2: -31.450391082561737\n",
      "p3: -28.620103266358875\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -27.41337979243351\n",
      "p1: -29.54658790824542\n",
      "p2: -29.09397590330203\n",
      "p3: -26.66506911264961\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.99720463611931\n",
      "p1: -36.18383558572057\n",
      "p2: -35.37637182987972\n",
      "p3: -32.141456986242005\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -35.54018069230591\n",
      "p1: -37.42058889798913\n",
      "p2: -36.61086493668246\n",
      "p3: -33.24515646570879\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -37.406212516955506\n",
      "p1: -38.80046745962054\n",
      "p2: -35.96347997939204\n",
      "p3: -34.200861721511714\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -36.858980579458645\n",
      "p1: -36.90099049575605\n",
      "p2: -36.066681450553695\n",
      "p3: -32.74583652234985\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -11.732934997322252\n",
      "p1: -12.081773994718239\n",
      "p2: -12.2942284821385\n",
      "p3: -12.264509446624318\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -16.528830424935816\n",
      "p1: -16.637822184476676\n",
      "p2: -16.66004870710129\n",
      "p3: -15.932992187939673\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -21.858876755018517\n",
      "p1: -23.630911040803582\n",
      "p2: -23.02576894540251\n",
      "p3: -21.750130771650397\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -17.94465435675789\n",
      "p1: -20.446091816572313\n",
      "p2: -19.68229432297784\n",
      "p3: -18.92678063977543\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -38.03583767750365\n",
      "p1: -40.76686348658249\n",
      "p2: -39.322554740004\n",
      "p3: -35.54610496422316\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.00578754383293\n",
      "p1: -35.46544649867224\n",
      "p2: -34.05030023549637\n",
      "p3: -31.275497637172755\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -35.133759454919286\n",
      "p1: -37.836671161731324\n",
      "p2: -37.009103518189484\n",
      "p3: -33.81801299825174\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -28.03617496601188\n",
      "p1: -30.083347586956208\n",
      "p2: -29.148507994866623\n",
      "p3: -27.478947013584822\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -34.20247022373014\n",
      "p1: -36.03524924526768\n",
      "p2: -35.405411871188065\n",
      "p3: -31.984139340522745\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -22.1138239699879\n",
      "p1: -24.227129454305008\n",
      "p2: -23.879250458974344\n",
      "p3: -21.769470033847597\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -29.932752738998456\n",
      "p1: -31.49818902585704\n",
      "p2: -30.330170779934996\n",
      "p3: -27.905698514255334\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -36.543135636348815\n",
      "p1: -38.44126088337471\n",
      "p2: -37.45333564932742\n",
      "p3: -33.79803666280421\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -31.5964266157714\n",
      "p1: -33.808745610378\n",
      "p2: -33.02920197182289\n",
      "p3: -30.197865784486183\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -36.08804579958198\n",
      "p1: -38.70139225617215\n",
      "p2: -37.94412694617282\n",
      "p3: -34.08784883068273\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -19.583894342546444\n",
      "p1: -21.1471362707549\n",
      "p2: -21.177992506415602\n",
      "p3: -19.77132686347167\n",
      "预测值：文学  真实值： 体育\n",
      "p0: -38.12973095269864\n",
      "p1: -38.77641204842834\n",
      "p2: -37.33155673667525\n",
      "p3: -34.5714149605108\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -22.597118889769376\n",
      "p1: -22.790063268931902\n",
      "p2: -22.11921146284058\n",
      "p3: -20.80774093307017\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -28.240975780387505\n",
      "p1: -28.754674782200112\n",
      "p2: -27.867245697357152\n",
      "p3: -25.53825878251501\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -31.058315248555825\n",
      "p1: -29.96395701868945\n",
      "p2: -30.047318548307477\n",
      "p3: -27.6116514412289\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -37.238185138510126\n",
      "p1: -38.38291973435572\n",
      "p2: -36.92953716726401\n",
      "p3: -33.933375679027506\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -40.272377743322075\n",
      "p1: -39.513465533481764\n",
      "p2: -38.26820958933339\n",
      "p3: -35.27983177859885\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -19.738413661217646\n",
      "p1: -19.85788950727013\n",
      "p2: -19.326806407234727\n",
      "p3: -18.15010331010025\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -28.432026568118445\n",
      "p1: -27.56194407196273\n",
      "p2: -27.080689584773904\n",
      "p3: -25.153496402524254\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -35.3201551982133\n",
      "p1: -34.474984187895124\n",
      "p2: -34.10002598475398\n",
      "p3: -30.953774213616164\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -20.868111107594377\n",
      "p1: -19.992644830805496\n",
      "p2: -20.040544754281783\n",
      "p3: -18.86213145792054\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -31.591184015191107\n",
      "p1: -31.363828978427073\n",
      "p2: -30.4352719966544\n",
      "p3: -28.076939064401778\n",
      "预测值：女性  真实值： 体育\n",
      "p0: -38.71800229275703\n",
      "p1: -42.66449458484717\n",
      "p2: -40.69373147851527\n",
      "p3: -37.68111379205621\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -41.40884782418732\n",
      "p1: -43.102075158354346\n",
      "p2: -41.86081799430446\n",
      "p3: -37.7009385923586\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -26.38257973755964\n",
      "p1: -29.48034816629798\n",
      "p2: -28.520181334065743\n",
      "p3: -26.391580003200282\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -30.637796029625395\n",
      "p1: -33.54136014720111\n",
      "p2: -32.796654993468486\n",
      "p3: -29.7191421868583\n",
      "预测值：体育  真实值： 体育\n",
      "p0: -33.64194005333306\n",
      "p1: -32.485956993385635\n",
      "p2: -31.157533468552952\n",
      "p3: -29.641444388044683\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -20.24156639103887\n",
      "p1: -20.240152857708775\n",
      "p2: -18.90848002849882\n",
      "p3: -18.87416020842476\n",
      "预测值：女性  真实值： 文学\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "p0: -11.588093693258685\n",
      "p1: -11.902954088389555\n",
      "p2: -7.734441721834119\n",
      "p3: -11.33957384438986\n",
      "预测值：体育  真实值： 文学\n",
      "p0: -25.056785749885325\n",
      "p1: -24.09199124452578\n",
      "p2: -22.625520465467687\n",
      "p3: -21.929964313027135\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -14.11603328722106\n",
      "p1: -13.928549412471197\n",
      "p2: -11.215126051246905\n",
      "p3: -13.179790869803206\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -26.836064793209975\n",
      "p1: -26.821288430085772\n",
      "p2: -24.396113347775245\n",
      "p3: -24.982723858483638\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -34.29067181622118\n",
      "p1: -35.522095466415166\n",
      "p2: -32.396268711528776\n",
      "p3: -31.66178339922788\n",
      "预测值：体育  真实值： 文学\n",
      "p0: -28.19526969340526\n",
      "p1: -27.775411474442947\n",
      "p2: -25.717048716771167\n",
      "p3: -25.274126605285204\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -11.588093693258685\n",
      "p1: -11.902954088389555\n",
      "p2: -7.734441721834119\n",
      "p3: -11.33957384438986\n",
      "预测值：体育  真实值： 文学\n",
      "p0: -19.714787406710396\n",
      "p1: -18.786403067556048\n",
      "p2: -17.91924037374673\n",
      "p3: -18.298416739593648\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -15.616895102142049\n",
      "p1: -15.368770079012194\n",
      "p2: -14.733955784254812\n",
      "p3: -14.79081093155931\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -21.466323591904327\n",
      "p1: -20.687496779094023\n",
      "p2: -19.96056507695159\n",
      "p3: -19.2926420414812\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -13.45337064419587\n",
      "p1: -13.33039726116989\n",
      "p2: -12.338752105873857\n",
      "p3: -13.43573264588595\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -13.45337064419587\n",
      "p1: -13.33039726116989\n",
      "p2: -12.338752105873857\n",
      "p3: -13.43573264588595\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -39.14401910634841\n",
      "p1: -38.6319546580469\n",
      "p2: -36.25265206441446\n",
      "p3: -34.114466692813906\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -37.74220647292681\n",
      "p1: -37.57173136898112\n",
      "p2: -34.95152059565503\n",
      "p3: -32.97697035889524\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -29.984839470999663\n",
      "p1: -30.168982035212785\n",
      "p2: -28.224097689974744\n",
      "p3: -26.876562814470383\n",
      "预测值：体育  真实值： 文学\n",
      "p0: -28.774134112817407\n",
      "p1: -27.967030192436\n",
      "p2: -25.92063249843861\n",
      "p3: -25.701980363870632\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -26.063614576529293\n",
      "p1: -25.643395270472553\n",
      "p2: -23.71673634999038\n",
      "p3: -23.74027828464188\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -32.91084571626244\n",
      "p1: -31.935355884673683\n",
      "p2: -30.641761464409953\n",
      "p3: -29.390629771447685\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -29.287342889351535\n",
      "p1: -28.756535783593236\n",
      "p2: -25.65535549346499\n",
      "p3: -25.926397385054486\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -23.856473779574042\n",
      "p1: -23.05314761943885\n",
      "p2: -20.138465971832623\n",
      "p3: -20.907390422788502\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -29.17314395674048\n",
      "p1: -28.114474213638434\n",
      "p2: -26.56734487078522\n",
      "p3: -24.98315006643675\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -36.677918099891805\n",
      "p1: -36.615588121854024\n",
      "p2: -34.53584012839224\n",
      "p3: -32.47827525032052\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -36.68445567986344\n",
      "p1: -36.45772179326108\n",
      "p2: -34.88685717579443\n",
      "p3: -32.63886985631124\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -40.72366429499042\n",
      "p1: -39.783155574574685\n",
      "p2: -37.953292645423225\n",
      "p3: -35.504847011861706\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -37.444259078030534\n",
      "p1: -36.39393797294366\n",
      "p2: -35.27238771977206\n",
      "p3: -33.00894408602334\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -36.26848577274378\n",
      "p1: -36.17484890833264\n",
      "p2: -34.583088311054055\n",
      "p3: -32.06148970833495\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -31.61403258870383\n",
      "p1: -30.941409485833535\n",
      "p2: -29.109828123431615\n",
      "p3: -27.741390518984424\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -27.054644508190876\n",
      "p1: -26.183784121083896\n",
      "p2: -24.354181996396896\n",
      "p3: -23.907644298871816\n",
      "预测值：女性  真实值： 文学\n",
      "p0: -13.916849240795505\n",
      "p1: -14.145152095442597\n",
      "p2: -10.512161354733367\n",
      "p3: -13.370188557198864\n",
      "预测值：体育  真实值： 文学\n",
      "p0: -31.784961762038368\n",
      "p1: -31.021214499656317\n",
      "p2: -30.915779252076188\n",
      "p3: -27.720290025059853\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -35.933569545750665\n",
      "p1: -35.77942757659296\n",
      "p2: -34.56052448941242\n",
      "p3: -30.365470664361958\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -22.188581245210077\n",
      "p1: -21.300032351246255\n",
      "p2: -20.88002135910512\n",
      "p3: -19.788705282329296\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -27.962705021436882\n",
      "p1: -27.014918298027485\n",
      "p2: -26.365616090098303\n",
      "p3: -23.052550324751387\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -16.784487944074737\n",
      "p1: -17.05879214417558\n",
      "p2: -16.27877980229038\n",
      "p3: -15.143420091244094\n",
      "预测值：体育  真实值： 校园\n",
      "p0: -21.32684461837147\n",
      "p1: -21.491462948958258\n",
      "p2: -20.17440135980137\n",
      "p3: -17.50315826398948\n",
      "预测值：体育  真实值： 校园\n",
      "p0: -38.05425813727752\n",
      "p1: -37.720527374329066\n",
      "p2: -36.502614550216784\n",
      "p3: -33.52374752039503\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -21.149105341858917\n",
      "p1: -20.90089929374829\n",
      "p2: -20.336525292325597\n",
      "p3: -17.855455716922165\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -27.836290135805843\n",
      "p1: -27.83668223279281\n",
      "p2: -27.293228755580355\n",
      "p3: -23.237321215554715\n",
      "预测值：体育  真实值： 校园\n",
      "p0: -37.41261739986316\n",
      "p1: -36.981385514293144\n",
      "p2: -35.813195554483066\n",
      "p3: -31.040939151104794\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -25.984022079462136\n",
      "p1: -25.069747051237172\n",
      "p2: -24.737056663984212\n",
      "p3: -22.658220543672854\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -31.30529017357231\n",
      "p1: -30.021323991686256\n",
      "p2: -29.38215186778909\n",
      "p3: -26.287042093110742\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -11.910609601121799\n",
      "p1: -11.65177442289118\n",
      "p2: -11.721775986598013\n",
      "p3: -11.472430583658856\n",
      "预测值：女性  真实值： 校园\n",
      "p0: -10.17455776390741\n",
      "p1: -11.039300193199702\n",
      "p2: -11.003752933753704\n",
      "p3: -11.52627649012607\n",
      "预测值：校园  真实值： 校园\n",
      "p0: -10.17455776390741\n",
      "p1: -11.039300193199702\n",
      "p2: -11.003752933753704\n",
      "p3: -11.52627649012607\n",
      "预测值：校园  真实值： 校园\n",
      "p0: -19.615805121873045\n",
      "p1: -19.179175425767653\n",
      "p2: -19.294392948095616\n",
      "p3: -16.217045082552794\n",
      "预测值：女性  真实值： 校园\n",
      "------ 0.68\n"
     ]
    }
   ],
   "source": [
    "preB=[]\n",
    "pre_labels='x'\n",
    "for i in range(1):\n",
    "    p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3=trainNB0(train_data,train_labels)\n",
    "    p0Vect,p1Vect,p2Vect,p3Vect\n",
    "    successNum=0  #计数\n",
    "    for i in range(len(test_labels)):\n",
    "        predict=classifyNB(test_data[i],p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3)\n",
    "        if predict==0:\n",
    "            print('预测值：女性  真实值：',test_labels[i])\n",
    "            pre_labels='女性'\n",
    "           # print(test_labels[i]=='女性')\n",
    "        if predict==1:\n",
    "            print('预测值：体育  真实值：',test_labels[i])\n",
    "            pre_labels='体育'\n",
    "        if predict==2:\n",
    "            print('预测值：文学  真实值：',test_labels[i])\n",
    "            pre_labels='文学'\n",
    "        if predict==3:\n",
    "            print('预测值：校园  真实值：',test_labels[i])\n",
    "            pre_labels='校园'\n",
    "        preB.append(pre_labels)\n",
    "        if pre_labels==test_labels[i]:\n",
    "            successNum+=1\n",
    "    print('------',successNum/len(test_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "#score=np.zerosvcvcvcvvvvvvvvcvcvcvcvvvvvvvvvvvvvcvcvcvcvcvcvvcvcvcvcvcvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n",
    "p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3=trainNB0(train_data,train_labels)\n",
    "for i in range(len(test_labels)):\n",
    "    predict=classifyNB(test_data[i],p0Vect,p1Vect,p2Vect,p3Vect,pAbusive0,pAbusive1,pAbusive2,pAbusive3)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.68\n"
     ]
    }
   ],
   "source": [
    "print(successNum/len(test_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQEAAAD5CAYAAAAnWoA9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUg0lEQVR4nO3de5BedX3H8fcnm0CAQAiGSyJUysVLnRaVqIBAg2NGULwUQRwRW9BmrLRW206VQluklrbqoC0quE68lRk02oI6KjeREgSKoJZ6QdACWoTRIBIuFZLdT/84Z2FZkn3OQ56z5zzP+bxmzuS5nHOe78I+3/3df7JNRHTXvKYDiIhmJQlEdFySQETHJQlEdFySQETHJQlEdFySwBCRdIakByTdJel2SX/QdEwx/JIEhs+HbC8DXgS8T9LSpgOK4ZYkMKRs3wncCezbdCwx3JIEhpSk3wL2BG6VdJKkWyT9VNIflu+vlvTJaedfKOm48vETzi9ff7ekn5XVjbfO8Y8UDVGGDQ8PSWcAfwE8COwGnAH8G3ABcCgwH7gJeB5g4L+B5cAC4CfA3sA+Wzh/I/AzYPfy9XHbr5mLnyuaNb/pAKJvH7L9Lkn7AleWr+0D/LB8vB3wDNtXSfoBcBiwPfAfth+SdMTmzge+AdwK/DNwMfD7c/HDRPNSHRhStn8MXFs+/bTtPWzvQVFFuK58/fPAq4FXAp8rX9Pmzrc9ATy/vOZ3gW9L2mZOfphoVEoCQ0rSHsBBwNXASeXzB4H/Ao4BvgP8e/n+JuDPy0uvAP5s5vmSHgI+AbwEuIqi0fEpwF1z9CN1ykuP2MH3/HKi0rk33vTwJbaPrCuWJIHh88eSTgImgfOBcyi+zNdS/P/8oO3vANj+maS7gLtsP1i+9l1Jf7e58yWtA24rP+dDtpMAarL+lxP85yV7Vjp3wbIf19oNnIbBESZpPnAucLntzzYdTzzmwAO29TUXP7XSuQuX33aj7RV1xZI2gdF2N/BM4KKG44gZDEziSkfdUh0YYbYzmrDFJplsOgQgSSCiEcZMtKQqniQQ0ZC5KOpXkSQQnSJpF+BA4Nu21zcVh4GJliSBNAy2iKTdy2661pO0WNJXJV1Wzkto/cAiScuALwMvAL4uademYjGw0ZOVjrqNbBKQtEbSNZJObzqWKiQtAT4F7NB0LBWdAJxtexVFL0Rtg1kG6NnAO2z/PXAJxZyJxkxWPOo2kklA0jHAmO1DgOWS9m86pgomgOOBDU0HUoXtj9i+rHy6K/DzJuOpwvbltq+TdDhFaeDaXtfUFgtmouJRt1FtE1gJrC0fX0ExY+7WxqKpwPYGAElNh9IXSQcDS2xf1/PkFlDxH/h4ilmT1cbt1sEw0Y4mgdEsCVAUqe8sH2+gmB4bA1Y2sp0DnNx0LFW5cApwDXB0Y3HQnurAqJYEHqCYIguwiNFNdo0pGwLXAqfavqPpeKqQ9E6KeRSfBnYGftVgNEzQjlLfqH45bqSoAgAcANzeXCgj600UXW2nSbpS0vFNB1TBOHCipKuAMeDSpgIxMOlqR91GtSRwEbBO0nLgKIopt0PB9sqmY6jC9rkUk5OGhu17gVVNxwFFEnikJX+D2xHFgJWNbCspFtc4wvZ9zUYU8USTVqWjbqNaEpjK+mt7nhjRgGLEYDvaBEY2CUS0mRETLSmItyOKmkha3XQM/Ri2eCExb422VAdGOgkArfif3YdhixcS85MyVR2octQt1YGIRogJt+NvcONJYOkuY957rwW13Ps3njqfFQcsHHhP6623LBn0LQFYOH8nFm+3bPA9wxP1jY5dOLaIxdvsNvCYvXHToG/5qIVsz07aZeAx38+9621XmploYCNjgw7hSWk8Cey91wKuv2SvpsPoy8uOOLbpEPqiDQ80HULfNt11d9Mh9O1yf77yyEk7JYGIzptMF2FEdxUNgykJRHRYqgMRnVZMJU4SiOgsIx5xegciOm0y1YGI7krDYETHGTExB/MCqkgSiGhIGgYjOswmXYQR3aaMGIzoMgOPuB1fv3ZEEdExZm4WDKmiHZWSiA6aYF6loxdJSyR9RdI6SeeVr1XeizNJIKIBxb4D8yodFZwInG/7MGBHSX9JH3txpjoQ0Yi+lg5bKumGac/HbY9Pe34P8AxJOwN7AffRx16cSQIRDZgqCVS03vaKWd6/Gng58DbgZmBbHr8X536z3TzVgYiGDHCh0bOAt9g+kyIJvJ4+9uJMEohogC02Ts6vdFSwPfDbksaAFwL/SB97cdZWHZC0BngW8BXb76nrcyKGUbGewMC6CP8B+ATwNOBa4AP0sRdnLSUBScfQR+tkRPcUKwtVOXqxfb3tZ9teZHtVv3tx1lUSWMksrZPlDjCroVgWPKJriobB+gYL9bMXZ11tAjvw+NbJ3ae/aXvc9grbK3Z9SjtWV4mYa4MaLLS16voz/AB9tE5GdE0Xhg3fSB+tkxFdNMm8Skfd6ioJXEQfrZMRXWPDxsl2FJBriaLf1smIrimqAwObO7BVamua76d1MqKL5mLb8SrSPxfRgLq7CPuRJBDRCGXfgYiuyxqDER1WrDacJBDRWUZsmmzHaNkkgYiGpDoQ0WHpHYiI9A5EdJrbM4EoSSCiAQNeWWirJAlENCQlgYgOM7CpJbMIkwQiGtCmRUWSBCIakjaBiC5z2gQedctN2/PSPQ9sOoy+3PX23ZoOoS/Lzv5R0yHEDBksFBFJAhFdZsREegciui0NgxEd5jQMRoSTBCK6LIOFIjovJYGIDss4gYiuy0KjEd1mUh2I6Lg0DEZ0nt10BIUkgYiGpDoQ0WF2kkBE57WlTaAd05giOmhyUpWOqiR9RNIrysdrJF0j6fRe1yUJRDTACLvaUYWkw4A9bH9J0jHAmO1DgOWS9p/t2iSBiIa44gEslXTDtGP19PtIWgB8DLhd0quAlcDa8u0rgENniyNtAhFN6K9hcL3tFbO8/0bg+8B7gT8BTgHWlO9tAPab7ea1lQQk7S5pXV33jxh6fRQFenguMG77buB84Cpgu/K9RfT4nteSBCQtAT4F7FDH/SNGwQDbBH4E7FM+XgHszWNVgAOA22e7uK7qwARwPPCFzb1Z1mlWAyxk+5pCiGi3AY4YXAN8XNLrgAUUbQJflLQcOAo4aLaLa0kCtjcASJvPYrbHgXGAnbRLSwZPRswdGzyghUZt3w8cN/01SSuBVcB7bd832/VpGIxoSJ1zB2zfy2M9BLNKEohoSkvKwEkCEY2oPhCobrUmAdsr67x/xFBLSSCiwzKLMCJSEojoupQEIjquJSWBWUcrSJonabNDf8v3XltPWBEjzhQlgSpHzXqVBPYGjpX0TWDJjPcEnEjFAQkR8XjDstDoJop5AH8NrAN2Bw4HvgXcSmsKNBFDqCXfni0mAUnzgfcAOwLLgC8D+wPPAK4HvgEcOAcxRoymljQM9prBsA54ZMZ5nvFvRPTLoMlqR922WBKwvUnSpcBiYFfgHIqFCpaVx+uBn9cfYsQomptGvyp6tQnsBXzH9vtnviFpHkUVISKejJaUpWdrE9gWOA34taQXb+aUecCddQUWMfLangRsPwwcJWkf4Czgd4C3A/eUpwjYtu4AI0ZW25PAFNv/A7xO0rHAT2zfXH9YESNuarBQC1Re38j2523fLOlFU6+VVYaIeBLkakfdeiYBSbdKumHaS2eVr/8e8Ld1BRYx8ga35PhWqTKB6Hbbq6Y9f1DSGHAq8PKtDUDzxxjbefHW3mZO7fjTiaZDiBEwF3/lq6iSBCzp2RRzB24pX3sD8AXbv6gtsohR15I2gdm6CBcAx1IMG34WcBjFIKHnA1cCH5iD+CJG0xwV9auYrU1gKcW65Ztsfx64z/ZrgRuAnYG31R9exAhrSZvAFpOA7btsn0wxWOgFwEJJRwOy/VfA0ZJ2qz/EiNE0NL0DFLnoFuCTwAnA1JSGNcDr6gkrogPaXhKY5mkUswk3AGfCo5sHXkLRVhARfdIwzCKcYvsZ059Lep+kk21/XNKf1hdaxIhrSe9ArzUGDy7bAR5l+8vACZJ2Bj5aY2wRo60l1YFeJYF5wJikm4BfU0waMkUV4STg6/WGFzG62jJYqFebwFSY91CsHfAr4GvATcDTgfNriyxi1A1JSeCVwE94Yki2/Ud1BhYx0uao+6+KLZYEypWDtqdIBFBUBaa/v9n9CCKiopaUBGYbLDQJfBY4d+qlaf8KOE/S0nrDixhdbekirLqewE7AURTzCI6gWGXoo8BbaoorIuZIrzaBMWAb2yumvyjpCttXl6sNRcST0ZI2gV5J4BvMaAsofQzA9tsHHVBEJ7SoYXDWJGB7s6tn2L6gnnAiOqQlSaDyGoMRMWAD7B2QtLukb5eP10i6RtLpVa5NEohogBj4VOL3A9tJOgYYs30IsFzS/r0urLK8WN8kLQY+U97/AeB424/U8VkRQ8l9df8tnbHY77jt8akn5eZADwJ3AyuBteVbVwCHUuwgvkW1JAGKdQfOtn2ZpHOBI4Ev1vRZEcOp+l/59TN76KZI2gb4G+DVwEXADjy2M9gGYL9eN68lCdj+yLSnu5KNSyOeaDANg+8CPmz7V5KgKHlvV763iApV/rpKAkAxFRlYYvu6Ga+vBlYDLJy3qM4QIlprQF2ELwFeLOkU4DnAbwA/Ba4DDgB+2OsGtSUBSbtQbGf+mpnvlfWZcYDFC3ZtSUdJxBwbwG++7cOnHku6kmKuzzpJyylG+R7U6x619A6U9ZS1wKm276jjMyKGWtXuwT4She2VtjdQNA5eBxxh+75e19XVRfgm4EDgNElXSjq+ps+JGFp1TSCyfa/ttbbvrnJ+XQ2D5/LY7MOI2IyhGDYcETVKEojosBZtQ5YkENEAsfnpuU1IEohoSkoCEd2WhsGIrpuD9QOrSBKIaMKwrCwUETVKEojotpQEIrouSSCi21ISiOiyjBiM6DYxN1uMVZEkENGUlAQiuk1uRxZIEohoQtoEIiK9A1Mmjf/v101H0ZfFN93TdAh92eyGki2nbbdtOoT+9ftrnCQQ0W0pCUR0WX/bkNUqSSCiKSkJRHTX1K7EbZAkENGUjBOI6LaUBCK6LIOFIiK9AxEdlyQQ0WUmDYMRXZeGwYiuSxKI6K4MForoOjttAhFdl96BiI5LdSCiywxMtiMLJAlENKUdOYB5dd1Y0i6SVklaWtdnRAwzudrR8z7SYklflXSZpAslbSNpjaRrJJ3e6/pakoCkZcCXgRcAX5e0ax2fEzHUpnoIeh29nQCcbXsVcDfwOmDM9iHAckn7z3ZxXdWBZwPvsH2dpCXA84BLavqsiKHUR8PgUkk3THs+bnt86ontj0x7b1fgDcAHy+dXAIcCt27p5rUkAduXA0g6nKI0cGYdnxMxrGRQ9YbB9bZX9LyndDCwBLgduLN8eQOw32zX1dkmIOB4YCMzVr2WtFrSDZJueMTDtdx4xMBMVjwqkLQLcA5wMvAAsF351iJ6fM9rSwIunAJcAxw9471x2ytsr9hGC+sKIaLVZFc6et5H2gZYC5xq+w7gRooqAMABFCWDLaqrYfCdkt5YPt0Z+FUdnxMxtNzH0dubgAOB0yRdSTE14URJZwOvpWik36K6GgbHgbWS3gx8F7i0ps+JGFKDmztg+1zg3OmvSfoisAp4r+37Zru+robBe8sAImIL6hw2XH4H11Y5NyMGI5qSWYQRHWbQRJJARLe1IwckCUQ0pUr331xIEohoSpJARIeZyqMB65YkENEAUW004FxIEohoSpJARIcZSBdhRLelOhDRdUkCEV2WzUciui27EkdExglEdFwaBiO6zMBEO4oCSQIRjUjD4KM2TN6z/tIHP31HTbdfCqwf+F1vHvgdp9QTb73qibneRajr+u/8tL7OThIo2K5tdyJJN1RZr70thi1eSMxbJUkgosOyK3FE1xmchsG5MN77lFbpK15JC4AJu/htkjSfovd5B9v3b+GafYB7y9VokbTQLraBKu+H7Y11xdwSzcfcot6B2nYgaoPpmzYOg17xSjqs3H76S5LupNh04guS7pF0EXARcAhwuaSVkj4n6ZOSPivpueVtTgaeO+22F0k6XNLewEnAxyXtLWnfMqlsVcxt1JqYB7cr8VYZ9ZLASLG9TtI/AUcCH7d9IXCepEtsv3rqPEkvo9iYcgI4jWLr6qWSLqXYFm6q5LAv8DCwEDgOeD6wLXAsxe/Gh4HNlihiAFrSMDjSJYER9RDwQtsXSjpI0vXAHZLOk3STpIOAF9n+UXn+eRRbwW0EHplxr7OAHwCXAy+jKCE8k2LvyG9uqUoRg1CxFJCSQEwn6QRgdfFQVwIXA1+h2IX2WmBP4HvAeyRNjWaYoNieeua9jqPYrPI225OSdgBOLN9+OUVJIupiYDJtAtG/C4CVFBu8Xg/8rHx9D8rBL+Vf71cAt1FsTLkA2FQ+nu57wNunPd+OYh/7/YDdaog9ZkpJIPo1rRcA4FTgecA+wF7AT3jsi/4q4OkUX/6dKOr1Uwlh6l7fl7T9tNsvA95cPt4DuKyunyNKLWkTSBIYUrYnJD0E3AEcTtHAd62kecDbKBr3ngMcA/wm8DGKkt+hm71hUZK4qHz8gtoCj4KNJyaajgJIdWDoqCgGCMD294Adga8B/1r++2aKHoD7gTOBMyhG4r8F+CFFw9/Ub5+AeZLGgPuAq8vjlvKzxubiZ+qsSVc7apaSwBCRtA3FF/yCsg//QxRf5LdS1OnXUnzp11LU699t+38lnUVR3N8d+BZFewIU3YFLKRoXf1FeO+X5FL8fn6n1h+qyllQH5JYEEv2T9FTbd057vj3wsO12lDNjixaPLfXBi15Z6dxLNnzixjonPKUkMMSmJ4Dy+UNNxRJPQkv+ACcJRDTELRknkCQQ0YisLBTRbQZa0kWYJBDRAANuyaIiGScQ0QSXi4pUOSqQtEbSNZJO7zeUJIGIhnjSlY5eJB0DjNk+BFguaf9+4sg4gYgGSLqYYqBWFQt5/PrL49MXRpH0L8DFtr8i6VhgR9ufqBpL2gQiGmD7yAHebgdgaszIBoqZoJWlOhAx/B6gGDYOsIg+v9dJAhHD70Yemx16AHB7PxenTSBiyEnaCVhHMYv0KOAg2/dVvj5JIGL4SVoCrAKusn13X9cmCUR0W9oEIjouSSCi45IEIjouSSCi45IEIjru/wGSpOaHRcpmUwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 288x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "confusion_metrix(test_labels,preB)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. sklearn库实现 准确率为： 0.91"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率为： 0.91\n"
     ]
    }
   ],
   "source": [
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn import metrics\n",
    "clf = MultinomialNB(alpha=0.001).fit(train_features, train_labels)\n",
    "predicted_labels=clf.predict(test_features)\n",
    "# 计算准确率\n",
    "print('准确率为：', metrics.accuracy_score(test_labels, predicted_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQYAAAD5CAYAAADFhptEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVdUlEQVR4nO3df5BdZX3H8fdnNyEhCYTEjSERBPkhKtNGJWCIQINjRlD80QhCRWxBzVipVm2nSqEtUkpbdNQWBFwmoNYZJNIatVACiJRgiBjUUn8AUflh+TEahCw/Bkh2v/3jnCV3793Nnnv3nn3Ovft5zZzJ3nPOPve7sPe7z/M9z3mOIgIzs1o9qQMws+pxYjCzBk4MZtbAicHMGjgxmFkDJwYza+DE0EEknSvpKUmPSLpf0p+kjsm6kxND57k4IhYBrwc+LakvdUDWfZwYOlREPAQ8BByYOhbrPk4MHUrSq4B9gC2STpd0r6RfS/pAfny1pC/VnP8NSSflXzecn+//lKSH86HKhyb5R7IKkadEdw5J5wJ/CTwNvBg4F/h34CrgKGAacBfwWiCA/wUWA9OBB4H9gQPGOH878DCwMN/fHxHvnIyfy6pnWuoArGkXR8QnJR0I3JLvOwC4J/96d+CQiLhV0s+Bo4FZwH9HxDOSjh3tfOB7wBbgX4DrgT+ejB/GqslDiQ4VEb8Ebs9ffiUi9o6IvcmGF5vy/dcA7wDeBnw936fRzo+IQeDw/Hv+APiRpN0m5YexynGPoUNJ2htYBtwGnJ6/fhr4H2AV8GPgP/LjO4C/yL/1ZuDj9edLega4EngjcCtZYfNFwCOT9CNNKW86dnY89rvBQufeeddz6yPiuJJDGsGJofP8maTTgSHgq8BFZB/w28n+f34+In4MEBEPS3oEeCQins73/UTS3492vqQNwH35+1wcEU4KJdn6u0G+v36fQudOX/TLSb8k7eJjF5M0DbgUuCkirk4dj+102JIZsfH6lxQ6d+bi++6MiKUlhzSCawzd7VHgFcC6xHFYnQCGiEJbCh5KdLGI8KzIChtiKHUIY3JiMEsgCAYrPIx3YjBLJNUwoQgnBptSJM0HDgN+FBFbU8URwGCFE4OLjxUiaWF+ybDyJM2V9F+Sbszvw6j8ZChJi4BrgSOA70pakCqWALbHUKEtha5NDJLWSNoo6ZzUsRQhaR7wZWB26lgKOhX4bESsJLv6MakTcFp0KPCxiPgHYD3ZPSLJDBXcUujKxCBpFdAbEcuBxZIOTh1TAYPAycBA6kCKiIhLIuLG/OUC4Dcp4ykiIm6KiE2SjiHrNdw+3veUFgvBYMEtha5MDMAKYG3+9c1kdxJWWkQMRMS21HE0S9KRwLyI2DTuyRUgSWQJeDtZMk4jYLDgVkTtMFTSdEn/mfeYzxhr3650a2KYTTbXH7K/wAsTxtK18kLeRcC4v2hVEZkzgY3ACcnioH1DiVGGoR8GNuc95hMk7THGvjF1a2J4iux2YoA5dO/PmUxebFwLnBURD6SOpwhJn5D03vzlXsATCaNhsOAG9EnaXLOtrmusfhi6gp095o3A0jH2jalbPzB3snP4sAS4P10oXet9ZJf9zpZ0i6STUwdUQD9wmqRbgV7ghlSBBDAUxTZga0Qsrdn6R7TVOAwdrcfcVC+6W+cxrAM2SFoMHE92e3JHiIgVqWMoIiIuJbtBq2NExOPAytRxQJYYni/v7/Jwj3kbWY/5qTH2jakrewwRMUDWddoEHNuJRT3rfkOhQlsLRusxN9WL7tYew/Bfh7XjnmiWQDbzsaUPfRFfBq6TdDTwKuD7ZMOI+n1j6soeg1nVBWKQnkJb4TbzYWheDF5Jto7nGyNicLR9u2qrqxPDKNXbSuu0eMExT0SJQwki4uGIWFs7jB5t31i6OjEAlfgFaEKnxQuOuSXDQ4mClysnXdfWGMyqTQxGdf8uJ08MffN7Y/99p5fS9ktfMo2lS2a2fbL5lp/MaXeTAMzUbOb29rV/crzK+6szs2cOc6ctaHvMMVjebOWZzGJPzW97zE/y+NaIKHTHZgDb6W13CG2TPDHsv+907li/b+owmnL8QctTh9AUzZiROoSmDW7riHvJRrhp8OrCM0Aj3GMws1EMJaofFOHEYJZAVnx0j8HMRvBQwszqZLddOzGYWY1APB++KmFmdYY8lDCzWi4+mlmDQAy2eB/EZHBiMEvExUczGyECX640s3ryzEczGymA56O6H7/qRmbWxYLWF2GZDE4MZon4cqWZjZA9V8KJwcxGSLdsWxFODGYJuMdgZqNyj8HMRogQ24eq+/ErrS8jaY2kjZLOKes9zDpVth6DCm0plJIYJK0CeiNiObBY0sFlvI9Z58pWcCqypVDWu65g53Mjb2bnwzSB7ElAkjZL2vzbx8pbJtysqrLiY3lPopqoshLDbLKHaAIMAAtrD0ZEf0QsjYilC15U3VVszMrU7mdXtlNZ1Y+ngN3zr+fQ/Y/CM2tK1adEl/WBvZOdw4clwP0lvY9Zxxqip9CWQlk9hnXABkmLgeOBZSW9j1lHioDtQ9XtSJcSWUQMkBUgNwHHFnnsttlUkg0legptKZQ2wyIiHmfnlQkzq+OZj2Y2wvDlyqqq7iDHrKu1byghaZ6k6yRtkHRZvm9CM4+dGMwSaeOU6NOAr0bE0cAekv6KCc489lDCLIFslejCQ4k+SZtrXvdHRH/N68eAQyTtBewLbKNx5vGWZuJzYjBLIBA7hgrP+t0aEUt3cfw24C3AR4C7gRmMnHl8ULPxeShhlkgbhxIXAB+MiPPIEsO7meDMYycGswTafBPVLOD3JPUCrwP+iQnOPPZQwiyRNk5e+kfgSmA/4Hbgc0xw5rETg1kKbbylOiLuAA6t3SdpBbASuLCVmcdODGYJDK/gVFr7E5x57MRglkiVZz46MZglEMCOCt9d6cRglkDVF2pxYjBLJNUK0EU4MZilEK4x7NK9d83iTfscljqMpmz9wJLUITSl7/I7UofQvKHuXj286rddJ08MZlOVE4OZjRCIQV+VMLN6Lj6a2Qjh4qOZjSacGMxsJE9wMrNRuMdgZiN4HoOZNWpuMdhJ58RglkDgoYSZNXDx0cxGEZE6grE5MZgl4qGEmY0Q4cRgZqNwjcHMGgwNOTGYWY1AHkqYWaMKX5RwYjBLouLFx9KWkJG0UNKGsto363hRcEuglB6DpHnAl4HZZbRv1g2mYo9hEDgZGBjtoKTVkjZL2ryd50oKwazasrkM428plNJjiIgBAGn0jBgR/UA/wJ6aX+UajFkpIiC8GKyZ1fO9EmbWyInBzEaawhOcImJFme2bdbQK9xiqW/0w62b5BKciW1GSLpH01vzrNZI2SjqnlfCcGMxSaeMEJ0lHA3tHxLclrQJ6I2I5sFjSwc2G5sRglkqo2AZ9w/N+8m11bTOSpgOXA/dLejuwAlibH74ZOKrZ0Fx8NEuleI1ha0Qs3cXx9wI/Ay4EPgycCazJjw0ABzUb2i57DJJ6JI06rTk/9q5m39DMyIcJhXsM43kN0B8RjwJfBW4Fds+PzaGFkcF4PYb9gRMl/QCYV3dMwGns7LKYWRPaOMHpF8AB+ddLyT63RwGbgCXAPc02OF5i2EF238PfABuAhcAxwA+BLVT6gotZxbXv07MGuELSKcB0shrDtyQtBo4HljXb4JiJQdI04HxgD2ARcC1wMHAIcAfwPeCwZt/QzHJtmuAUEU8CJ9Xuk7QCWAlcGBHbmm1zvLHHBuD5uvOi7l8za1aAhoptLTUf8XhErM3rDk0bs8cQETsk3QDMBRYAF5EVNBbl27uB37TypmZWuLCYxHg1hn2BH0fEZ+oPSOohG16YWSsq3OfeVY1hBnA28KykN4xySg/wUFmBmXW9TkwMEfEccLykA4ALgN8HPgo8lp8iYEbZAZp1rU5MDMMi4lfAKZJOBB6MiLvLD8usyw1PcKqowjOiIuKaiLhb0uuH9+XDDTNrgaLYlsK4iUHSFkmba3ZdkO//Q+DvygrMrOt1+PLx90fEyprXT0vqBc4C3lJOWNXW98XbU4fQlIE/anriW3J7XrUpdQilS9UbKKJIYghJh5LdK3Fvvu89wDcj4relRWbW7SpcY9jV5crpwIlkU6JfCRxNNrHpcOAW4HOTEJ9Zd0o4TChiVzWGPrK51jsi4hpgW0S8C9gM7AV8pPzwzLpYhWsMYyaGiHgkIs4gm+B0BDBT0gmAIuKvgRMkvXiyAjXrNh19VYIsZ90LfAk4FRi+rWMNcEo5YZlNAZ3YY6ixH9ldlgPAecCsfP96stqDmTVJJd9dOVFFZj4eUvta0qclnRERV0j68/JCM+tyFb4qMd6aj0fmdYUXRMS1wKmS9gK+WGJsZt2twkOJ8XoMPUCvpLuAZ8lunAqy4cXpwHfLDc+se1V5gtN4NYbh0B8jW3vhCeA7wF3Ay8lWpDWzVnRwj+FtwIM0hhkR8adlBmbW1RJeiixizB5DvkLTLLLkANkwovb4qM+bMLOCKtxj2NUEpyHgauDS4V01/wq4TFJfueGZda8qX64suh7DnmTr0+8BHEu2mtMXgQ+WFJeZJTRejaEX2K3+uXmSbo6I2/JVncysFRWuMYyXGL5HXW0hdzlARHy03QGZTQkVLz7uMjFExOAY+68qJxyzKaRTE4OZlciJwcxqiQ4eSrRK0lzga3n7TwEnR8TzZbyXWUeKdJciiyi8fHyTTgU+my8i+yhwXEnvY9a5KjzBqZQeQ0RcUvNyAX74rVmjqTaUGCbpSGBeRGyq278aWA0w84V1X8ymlilXYwCQNB+4CHhn/bGI6Af6AfbU/Ar/5zErUYV/88sqPu4GrAXOiogHyngPs47WwcvHT8T7gMOAsyXdIunkkt7HrGO18yYqSQsl/Sj/eo2kjZLOaTW2soqPl7LzrkwzG0WbawyfAXaXtArojYjlki6RdHBEbGm2sbJ6DGY2nuKXK/skba7ZVtc2I+kNwNNkUwNWkA3jAW4GjmolNM98NEuhuRrD1vo7nIfl9by/Bd4BrANmAw/lhweAg1oJz4nBLAEx+m3LLfgk8IWIeEISZDONd8+PzaHFUYETg1kq7akxvBF4g6QzgVcDLwV+DWwClgD3tNKoE4NZIu0oPkbEMS+0J91CtkbrBkmLyVZdW9ZKuy4+mqUyVHArKCJWRMQAWQFyE3BsRGxrJTT3GMxSKHEFp4h4nJ1XJlrixGCWSoVnPjoxmCUyJW+iMrNxODGYWT33GMxspIrfXenEYJaAqPaaj04MZqm4x2Bm9RTVzQxODGYpuMZgZqPxVYldUE8PPTNnpA6jKUPPPpc6hKbMveaHqUNoWu/L9ksdQvN+1eT5TgxmVs89BjMbqeKPqHNiMEvFPQYzqzUln3ZtZgV4HoOZ1XOPwcxG8gQnMxuNr0qYWQMnBjMbKXDx0cwaufhoZo2cGMyslic4mVmjCNcYzKyRr0qYWQMPJcxspACGqpsZnBjMUqluXqCnrIYlzZe0UlJfWe9h1skUxbYUSkkMkhYB1wJHAN+VtKCM9zHraMNXJsbbEihrKHEo8LGI2CRpHvBaYH1J72XWkaZc8TEibgKQdAxZr+G8Mt7HrFMpQFOx+ChJwMnAdmCw7thqYDXATM0uKwSzamvTPAZJc4GvkX2enyL73F0KvBK4LiLOb7bN0oqPkTkT2AicUHesPyKWRsTS3TSzrBDMKk0RhbYCTgU+GxErgUeBU4DeiFgOLJZ0cLOxldJjkPQJ4JGI+AqwF/BEGe9j1rGaW8GpT9Lmmtf9EdH/QlMRl9QcWwC8B/h8/vpm4ChgSzPhlTWU6AfWSno/8BPghpLex6xDNXXFYWtELB3vJElHAvOA+4GH8t0DwEHNRldW8fFxYGUZbZt1i3ZelZA0H7gIeCfwcWD3/NAcWigZlFZjMLNxtGkeg6TdgLXAWRHxAHAn2fABYAlZD6IpnhJtlkKABtvWZXgfcBhwtqSzgSuB0yQtBo4HljXboBODWSptygsRcSnZ5ckXSPoW2XD+wojY1mybTgxmiRS8FNmSvM63ttXvd2IwS8UrOJnZCEHbZj6WwYnBLAFReFZjEk4MZqk4MZjZCAG073Jl2zkxmCXioYSZNXJiMLOR/MAZM6vnp12b2ag8j8HM6rn4aGYjBTBY3S6DE4NZEi4+7tLA0GNbb3j6Kw+U1HwfsLWktstQTryD458yAeXE/Ku2t1irrN+L/Zo624lhbBFR2lOqJG0uslZeVXRavOCYJ8SJwcxG8NOuzaxRQLj4mEr/+KdUSlPxSpoODEZkv2GSppFdHZ8dEU+O8T0HAI/nK/wgaWZEPFvTHhGxvayYKyJ9zBW/KtHVq0TXPpSjE4wXr6SjJd0o6duSHiJbBPSbkh6TtA5YBywHbpK0QtLXJX1J0tWSXpM3cwbwmppm10k6RtL+wOnAFZL2l3RgnmgmFHMVVSbmKfi0aytBRGyQ9M/AccAVEfEN4DJJ6yPiHcPnSXoz2YNHBoGzyR5h1ifpBrJHBg73MA4EngNmAicBhwMzgBPJfje+AIza87A2qHDxsat7DF3qGeB1EfENScsk3QE8IOkySXdJWga8PiJ+kZ9/GdljArcDz9e1dQHwc+Am4M1kPYlXkD1r9AdjDUesHQr2FtxjsPFIOpXsKeGSdAtwPXAd2ROObwf2AX4KnC/p7vzbBskeU1bf1klkDyO5LyKGJM0GTssPv4Wsx2FlCWDINQZrj6uAFWQPCb4DeDjfvzf5hJ38r/xbgfsAAdOBHfnXtX4KfLTm9e5kzzg8CHhxCbFbPfcYrB1qrj4AnAW8FjgA2Bd4kJ0f/rcDLydLCHuS1QmGk8RwWz+TNKum+UXA+/Ov9wZuLOvnsFyFawxODB0qIgYlPQM8ABxDVkS8XVIP8BGyAuKrgVXAy4DLyXqIR43aYNbjWJd/fURpgVsmghgsd676RHgo0WGUdRcEEBE/BfYAvgP8W/7v+8muPDwJnAecCzwLfBC4h6y4OPwbKaBHUi+wDbgt3+7N36t3Mn6mKWsoim0JuMfQQfKnGm8ErsrnGFxM9uH+EFmNYC1ZIlhLVif4VET8n6QLyIYKC4EfktUnILs02UdWwPxt/r3DDif7/fhaqT/UVFbhoYSiwsHZrkl6SUQ8VPN6FvBcRFS3j2oAzO3tiyPnvK3QuesHrrxzsm/6co+hg9Umhfz1M6lisRZU+I+yE4NZIlHheQxODGZJeAUnM6sXQIUvVzoxmCUQQFR4oRbPYzBLIfKFWopsBUhaI2mjpHPaEZ4Tg1kiMRSFtvFIWgX0RsRyYLGkgycam+cxmCUg6XqyyWVFzCSbvTqsv3axGUn/ClwfEddJOhHYIyKunEh8rjGYJRARx7WxudnA8JyWAbI7ZCfEQwmzzvcU2ZR4gDm04XPtxGDW+e5k512zS4D7J9qgawxmHU7SnsAGsrtrjweWRcS2CbXpxGDW+STNA1YCt0bEoxNuz4nBzOq5xmBmDZwYzKyBE4OZNXBiMLMGTgxm1uD/AfHnfvP9+fd4AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 288x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "confusion_metrix(test_labels,predicted_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:pytorch]",
   "language": "python",
   "name": "conda-env-pytorch-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
